code cleanup for compute_node_rebase branch

1. Handle SLRU and nonrel files as pageserver pages: upload them via restore_s3, handle in protocol.
2. Parse pg_control to retrieve systemid, lsn and so on. Store it in pagecache. 3. Setup compute node without files: only request a few essential files from pageserver to bootstrap. And after that route ALL I/O requests to pageserver. Use initdb --compute-node flag to create such minimal node without files. And GUC 'computenode_mode=true'to request all pages from pageserver
2026-03-13 21:30:37 +00:00 · 2021-04-09 17:25:41 +03:00 · 2021-04-08 16:01:24 +03:00 · 2021-04-08 15:14:44 +03:00
129 changed files with 18166 additions and 13850 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,267 +0,0 @@
-version: 2.1
-
-orbs:
-  python: circleci/python@1.4.0
-
-executors:
-  zenith-build-executor:
-    resource_class: xlarge
-    docker:
-      - image: cimg/rust:1.51.0
-
-jobs:
-
-  # A job to build postgres
-  build-postgres:
-    executor: zenith-build-executor
-    steps:
-        # Checkout the git repo (circleci doesn't have a flag to enable submodules here)
-      - checkout
-
-        # Grab the postgres git revision to build a cache key.
-        # Note this works even though the submodule hasn't been checkout out yet.
-      - run:
-          name: Get postgres cache key
-          command: |
-            git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
-
-      - restore_cache:
-          name: Restore postgres cache
-          keys:
-            # Restore ONLY if the rev key matches exactly
-            - v03-postgres-cache-{{ checksum "/tmp/cache-key-postgres" }}
-
-        # FIXME We could cache our own docker container, instead of installing packages every time.
-      - run:
-          name: apt install dependencies
-          command: |
-            if [ ! -e tmp_install/bin/postgres ]; then
-              sudo apt update
-              sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev libcurl4-openssl-dev
-            fi
-
-        # Build postgres if the restore_cache didn't find a build.
-        # `make` can't figure out whether the cache is valid, since
-        # it only compares file timestamps.
-      - run:
-          name: build postgres
-          command: |
-            if [ ! -e tmp_install/bin/postgres ]; then
-              # "depth 1" saves some time by not cloning the whole repo
-              git submodule update --init --depth 1
-              make postgres
-            fi
-
-      - save_cache:
-          name: Save postgres cache
-          key: v03-postgres-cache-{{ checksum "/tmp/cache-key-postgres" }}
-          paths:
-            - tmp_install
-
-  # A job to build zenith rust code
-  build-zenith:
-    executor: zenith-build-executor
-    parameters:
-      build_type:
-        type: enum
-        enum: ["debug", "release"]
-    steps:
-      - run:
-          name: apt install dependencies
-          command: |
-            sudo apt update
-            sudo apt install libssl-dev clang
-
-        # Checkout the git repo (without submodules)
-      - checkout
-
-        # Grab the postgres git revision to build a cache key.
-        # Note this works even though the submodule hasn't been checkout out yet.
-      - run:
-          name: Get postgres cache key
-          command: |
-            git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
-
-      - restore_cache:
-          name: Restore postgres cache
-          keys:
-            # Restore ONLY if the rev key matches exactly
-            - v03-postgres-cache-{{ checksum "/tmp/cache-key-postgres" }}
-
-      - restore_cache:
-          name: Restore rust cache
-          keys:
-            # Require an exact match. While an out of date cache might speed up the build,
-            # there's no way to clean out old packages, so the cache grows every time something
-            # changes.
-            - v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
-
-        # Build the rust code, including test binaries
-      - run:
-          name: Rust build << parameters.build_type >>
-          command: |
-            export CARGO_INCREMENTAL=0
-            BUILD_TYPE="<< parameters.build_type >>"
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              echo "Build in debug mode"
-              cargo build --bins --tests
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              echo "Build in release mode"
-              cargo build --release --bins --tests
-            fi
-
-      - save_cache:
-          name: Save rust cache
-          key: v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
-          paths:
-            - ~/.cargo/registry
-            - ~/.cargo/git
-            - target
-
-        # Run rust unit tests
-        # FIXME: remove -p zenith_utils once integration tests are moved to python
-      - run: cargo test -p zenith_utils
-
-        # Install the rust binaries, for use by test jobs
-        # `--locked` is required; otherwise, `cargo install` will ignore Cargo.lock.
-        # FIXME: this is a really silly way to install; maybe we should just output
-        # a tarball as an artifact? Or a .deb package?
-      - run:
-          name: cargo install
-          command: |
-            export CARGO_INCREMENTAL=0
-            BUILD_TYPE="<< parameters.build_type >>"
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              echo "Install debug mode"
-              CARGO_FLAGS="--debug"
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              echo "Install release mode"
-              # The default is release mode; there is no --release flag.
-              CARGO_FLAGS=""
-            fi
-            cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path pageserver
-            cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path walkeeper
-            cargo install $CARGO_FLAGS --locked --root /tmp/zenith --path zenith
-
-        # Install the postgres binaries, for use by test jobs
-        # FIXME: this is a silly way to do "install"; maybe just output a standard
-        # postgres package, whatever the favored form is (tarball? .deb package?)
-        # Note that pg_regress needs some build artifacts that probably aren't
-        # in the usual package...?
-      - run:
-          name: postgres install
-          command: |
-            cp -a tmp_install /tmp/zenith/pg_install
-
-        # Save the rust output binaries for other jobs in this workflow.
-      - persist_to_workspace:
-          root: /tmp/zenith
-          paths:
-            - "*"
-
-  run-pytest:
-    #description: "Run pytest"
-    executor: python/default
-    parameters:
-      # pytest args to specify the tests to run.
-      #
-      # This can be a test file name, e.g. 'test_pgbench.py, or a subdirectory,
-      # or '-k foobar' to run tests containing string 'foobar'. See pytest man page
-      # section SPECIFYING TESTS / SELECTING TESTS for details.
-      #
-      # Select the type of Rust build. Must be "release" or "debug".
-      build_type:
-        type: string
-        default: "debug"
-      # This parameter is required, to prevent the mistake of running all tests in one job.
-      test_selection:
-        type: string
-        default: ""
-      # Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
-      extra_params:
-        type: string
-        default: ""
-      needs_postgres_source:
-        type: boolean
-        default: false
-    steps:
-      - attach_workspace:
-          at: /tmp/zenith
-      - checkout
-      - when:
-          condition: << parameters.needs_postgres_source >>
-          steps:
-            - run: git submodule update --init --depth 1
-      - run: pip install pytest psycopg2
-      - run:
-          name: Run pytest
-          working_directory: test_runner
-          environment:
-            - ZENITH_BIN: /tmp/zenith/bin
-            - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
-            - TEST_OUTPUT: /tmp/test_output
-          command: |
-            TEST_SELECTION="<< parameters.test_selection >>"
-            EXTRA_PARAMS="<< parameters.extra_params >>"
-            if [ -z "$TEST_SELECTION" ]; then
-              echo "test_selection must be set"
-              exit 1
-            fi
-            # Run the tests.
-            #
-            # The junit.xml file allows CircleCI to display more fine-grained test information
-            # in its "Tests" tab in the results page.
-            pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short $TEST_SELECTION $EXTRA_PARAMS
-      - run:
-          # CircleCI artifacts are preserved one file at a time, so skipping
-          # this step isn't a good idea. If you want to extract the
-          # pageserver state, perhaps a tarball would be a better idea.
-          name: Delete pageserver data
-          when: always
-          command: |
-            du -sh /tmp/test_output/*
-            for DIR in /tmp/test_output/*; do
-              mv $DIR/repo/pageserver.log $DIR/ || true # ignore errors
-              for PGDIR in $DIR/repo/pgdatadirs/pg?; do
-                echo "PGDIR: $PGDIR"
-                NEW_LOG="${PGDIR##*/}_log"
-                mv $PGDIR/log "$DIR/$NEW_LOG" || true # ignore errors
-              done
-              echo "rm $DIR/repo"
-              rm -rf $DIR/repo
-            done
-            du -sh /tmp/test_output/*
-      - store_artifacts:
-          path: /tmp/test_output
-      # The store_test_results step tells CircleCI where to find the junit.xml file.
-      - store_test_results:
-          path: /tmp/test_output
-
-workflows:
-  build_and_test:
-    jobs:
-      - build-postgres
-      - build-zenith:
-          name: build-zenith-<< matrix.build_type >>
-          matrix:
-            parameters:
-              build_type: ["debug", "release"]
-          requires:
-            - build-postgres
-      - run-pytest:
-          name: pg_regress tests << matrix.build_type >>
-          matrix:
-            parameters:
-              build_type: ["debug", "release"]
-          test_selection: batch_pg_regress
-          needs_postgres_source: true
-          requires:
-            - build-zenith-<< matrix.build_type >>
-      - run-pytest:
-          name: other tests << matrix.build_type >>
-          matrix:
-            parameters:
-              build_type: ["debug", "release"]
-          test_selection: batch_others
-          requires:
-            - build-zenith-<< matrix.build_type >>
--- a/.github/workflows/notifications.yml
+++ b/.github/workflows/notifications.yml
@@ -1,45 +0,0 @@
-name: Send Notifications
-
-on:
-  push:
-    branches: [ main ]
-
-jobs:
-  send-notifications:
-    timeout-minutes: 30
-    name: send commit notifications
-    runs-on: ubuntu-latest
-
-    steps:
-
-      - name: Checkout
-        uses: actions/checkout@v2
-        with:
-          submodules: true
-          fetch-depth: 2
-
-      - name: Form variables for notification message
-        id: git_info_grab
-        run: |
-          git_stat=$(git show --stat=50)
-          git_stat="${git_stat//'%'/'%25'}"
-          git_stat="${git_stat//$'\n'/'%0A'}"
-          git_stat="${git_stat//$'\r'/'%0D'}"
-          git_stat="${git_stat// / }" # space -> 'Space En', as github tends to eat ordinary spaces
-          echo "::set-output name=git_stat::$git_stat"
-          echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
-          echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})"
-
-      - name: Send notification
-        uses: appleboy/telegram-action@master
-        with:
-          to: ${{ secrets.TELEGRAM_TO }}
-          token: ${{ secrets.TELEGRAM_TOKEN }}
-          format: markdown
-          args: |
-            *@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }})
-
-            ```
-            ${{ steps.git_info_grab.outputs.git_stat }}
-            ```
-
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -1,36 +1,44 @@
-name: Build and Test
+name: regression check

-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
+on: [push]

 jobs:
  regression-check:
-    strategy:
-      matrix:
-        # If we want to duplicate this job for different
-        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
-        rust_toolchain: [stable]
-        os: [ubuntu-latest]
-    timeout-minutes: 30
    name: run regression test suite
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest

    steps:
+
      - name: Checkout
        uses: actions/checkout@v2
        with:
          submodules: true
          fetch-depth: 2

-      - name: install rust toolchain ${{ matrix.rust_toolchain }}
-        uses: actions-rs/toolchain@v1
+      - name: Form variables for notification message
+        id: git_info_grab
+        run: |
+          git_stat=$(git show --stat=50)
+          git_stat="${git_stat//'%'/'%25'}"
+          git_stat="${git_stat//$'\n'/'%0A'}"
+          git_stat="${git_stat//$'\r'/'%0D'}"
+          git_stat="${git_stat// / }" # space -> 'Space En', as github tends to eat ordinary spaces
+          echo "::set-output name=git_stat::$git_stat"
+          echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
+          echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})"
+
+      - name: Send notification
+        uses: appleboy/telegram-action@master
        with:
-          profile: minimal
-          toolchain: ${{ matrix.rust_toolchain }}
-          override: true
+          to: ${{ secrets.TELEGRAM_TO }}
+          token: ${{ secrets.TELEGRAM_TOKEN }}
+          format: markdown
+          args: |
+            *@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }})
+
+            ```
+            ${{ steps.git_info_grab.outputs.git_stat }}
+            ```

      - name: Install postgres dependencies
        run: |
@@ -52,7 +60,11 @@ jobs:
      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
        run: |
-          make postgres
+          ./pgbuild.sh
+
+      - name: Install rust
+        run: |
+          sudo apt install -y cargo

      - name: Cache cargo deps
        id: cache_cargo
@@ -64,10 +76,13 @@ jobs:
            target
          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}

-      - name: Run cargo build
+      # That build is only to build dependencies and can be skipped if Cargo.lock
+      # wasn't changed. Next steps need their own build
+      - name: Install cargo deps
+        if: steps.cache_cargo.outputs.cache-hit != 'true'
        run: |
-          cargo build --workspace --bins --examples --tests
+          cargo build

-      - name: Run cargo test
+      - name: Run test
        run: |
-          cargo test -- --nocapture --test-threads=1
+          cargo test --test test_pageserver -- --nocapture --test-threads=1
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,3 @@
 /target
 /tmp_check
 /tmp_install
-/tmp_check_cli
-__pycache__/
-test_output/
-.vscode
-/.zenith
-/integration_tests/.zenith
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,9 +3,4 @@ members = [
    "integration_tests",
    "pageserver",
    "walkeeper",
-    "zenith",
-    "control_plane",
-    "postgres_ffi",
-    "zenith_utils",
-    "workspace_hack",
 ]
--- a/57
+++ b/57
@@ -1,57 +0,0 @@
-#
-# Top level Makefile to build Zenith and PostgreSQL
-#
-all: zenith postgres
-
-# We don't want to run 'cargo build' in parallel with the postgres build,
-# because interleaving cargo build output with postgres build output looks
-# confusing. Also, 'cargo build' is parallel on its own, so it would be too
-# much parallelism. (Recursive invocation of postgres target still gets any
-# '-j' flag from the command line, so 'make -j' is still useful.)
-.NOTPARALLEL:
-
-### Zenith Rust bits
-#
-# The 'postgres_ffi' depends on the Postgres headers.
-zenith: postgres-headers
-	cargo build
-
-### PostgreSQL parts
-tmp_install/build/config.status:
-	+@echo "Configuring postgres build"
-	mkdir -p tmp_install/build
-	(cd tmp_install/build && \
-	../../vendor/postgres/configure CFLAGS='-O0' --enable-debug --enable-cassert \
-	    --enable-depend --with-libxml --prefix=$(abspath tmp_install) > configure.log)
-
-# nicer alias for running 'configure'
-postgres-configure: tmp_install/build/config.status
-
-# Install the PostgreSQL header files into tmp_install/include
-postgres-headers: postgres-configure
-	+@echo "Installing PostgreSQL headers"
-	$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
-
-
-# Compile and install PostgreSQL and contrib/zenith
-postgres: postgres-configure
-	+@echo "Compiling PostgreSQL"
-	$(MAKE) -C tmp_install/build MAKELEVEL=0 install
-	+@echo "Compiling contrib/zenith"
-	(cd vendor/postgres/contrib/zenith && \
-	$(MAKE) PG_CONFIG=$(abspath tmp_install)/bin/pg_config install USE_PGXS=1)
-
-postgres-clean:
-	$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
-
-# This doesn't remove the effects of 'configure'.
-clean:
-	cd tmp_install/build && ${MAKE} clean
-	cargo clean
-
-# This removes everything
-distclean:
-	rm -rf tmp_install
-	cargo clean
-
-.PHONY: postgres-configure postgres postgres-headers zenith
--- a/README.md
+++ b/README.md
@@ -2,88 +2,12 @@

 Zenith substitutes PostgreSQL storage layer and redistributes data across a cluster of nodes

-## Running local installation
-
-1. Build zenith and patched postgres
-```sh
-git clone --recursive https://github.com/libzenith/zenith.git
-cd zenith
-make -j5
-```
-
-2. Start pageserver and postgres on top of it (should be called from repo root):
-```sh
-# Create repository in .zenith with proper paths to binaries and data
-# Later that would be responsibility of a package install script
-> ./target/debug/zenith init
-<...>
-new zenith repository was created in .zenith
-
-# start pageserver
-> ./target/debug/zenith start
-Starting pageserver at '127.0.0.1:64000' in .zenith
-Pageserver started
-
-# start postgres on top on the pageserver
-> ./target/debug/zenith pg start main
-Starting postgres node at 'host=127.0.0.1 port=55432 user=stas'
-waiting for server to start.... done
-
-# check list of running postgres instances
-> ./target/debug/zenith pg list
-BRANCH	ADDRESS		LSN		STATUS
-main	127.0.0.1:55432	0/1609610	running
-```
-
-3. Now it is possible to connect to postgres and run some queries:
-```sh
-> psql -p55432 -h 127.0.0.1 postgres
-postgres=# CREATE TABLE t(key int primary key, value text);
-CREATE TABLE
-postgres=# insert into t values(1,1);
-INSERT 0 1
-postgres=# select * from t;
- key | value
-----+-------
-   1 | 1
-(1 row)
-```
-
-4. And create branches and run postgres on them:
-```sh
-# create branch named migration_check
-> ./target/debug/zenith branch migration_check main
-Created branch 'migration_check' at 0/1609610
-
-# check branches tree
-> ./target/debug/zenith branch
- main
- ┗━ @0/1609610: migration_check
-
-# start postgres on that branch
-> ./target/debug/zenith pg start migration_check
-Starting postgres node at 'host=127.0.0.1 port=55433 user=stas'
-waiting for server to start.... done
-
-# this new postgres instance will have all the data from 'main' postgres,
-# but all modifications would not affect data in original postgres
-> psql -p55433 -h 127.0.0.1 postgres
-postgres=# select * from t;
- key | value 
-----+-------
-   1 | 1
-(1 row)
-
-postgres=# insert into t values(2,2);
-INSERT 0 1
-```
-
 ## Running tests

 ```sh
 git clone --recursive https://github.com/libzenith/zenith.git
-make # builds also postgres and installs it to ./tmp_install
-pytest
+./pgbuild.sh # builds postgres and installs it to ./tmp_install
+cargo test -- --test-threads=1
 ```

 ## Source tree layout
@@ -102,6 +26,11 @@ Depends on the modified 'postgres' binary for WAL redo.

 Tests with different combinations of a Postgres compute node, WAL safekeeper and Page Server.

+/mgmt-console:
+
+Web UI to launch (modified) Postgres servers, using S3 as the backing store. Written in Python.
+This is somewhat outdated, as it doesn't use the WAL safekeeper or Page Servers.
+
 /vendor/postgres:

 PostgreSQL source tree, with the modifications needed for Zenith.
--- a/cli-v2-story.md
+++ b/cli-v2-story.md
@@ -1,188 +0,0 @@
-Create a new Zenith repository in the current directory:
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
-    The files belonging to this database system will be owned by user "heikki".
-    This user must also own the server process.
-    
-    The database cluster will be initialized with locale "en_GB.UTF-8".
-    The default database encoding has accordingly been set to "UTF8".
-    The default text search configuration will be set to "english".
-    
-    Data page checksums are disabled.
-    
-    creating directory tmp ... ok
-    creating subdirectories ... ok
-    selecting dynamic shared memory implementation ... posix
-    selecting default max_connections ... 100
-    selecting default shared_buffers ... 128MB
-    selecting default time zone ... Europe/Helsinki
-    creating configuration files ... ok
-    running bootstrap script ... ok
-    performing post-bootstrap initialization ... ok
-    syncing data to disk ... ok
-    
-    initdb: warning: enabling "trust" authentication for local connections
-    You can change this by editing pg_hba.conf or using the option -A, or
-    --auth-local and --auth-host, the next time you run initdb.
-    new zenith repository was created in .zenith
-
-Initially, there is only one branch:
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
-      main
-
-Start a local Postgres instance on the branch:
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
-    Creating data directory from snapshot at 0/15FFB08...
-    waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
-    2021-04-13 09:27:43.920 EEST [984664] LOG:  listening on IPv6 address "::1", port 5432
-    2021-04-13 09:27:43.920 EEST [984664] LOG:  listening on IPv4 address "127.0.0.1", port 5432
-    2021-04-13 09:27:43.927 EEST [984664] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5432"
-    2021-04-13 09:27:43.939 EEST [984665] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
-    2021-04-13 09:27:43.939 EEST [984665] LOG:  creating missing WAL directory "pg_wal/archive_status"
-    2021-04-13 09:27:44.189 EEST [984665] LOG:  database system was not properly shut down; automatic recovery in progress
-    2021-04-13 09:27:44.195 EEST [984665] LOG:  invalid record length at 0/15FFB80: wanted 24, got 0
-    2021-04-13 09:27:44.195 EEST [984665] LOG:  redo is not required
-    2021-04-13 09:27:44.225 EEST [984664] LOG:  database system is ready to accept connections
-     done
-    server started
-
-Run some commands against it:
-
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);" 
-    CREATE TABLE
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');" 
-    INSERT 0 1
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-    (1 row)
-
-Create a new branch called 'experimental'. We create it from the
-current end of the 'main' branch, but you could specify a different
-LSN as the start point instead.
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
-    branching at end of WAL: 0/161F478
-    
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch 
-      experimental
-      main
-
-Start another Postgres instance off the 'experimental' branch:
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
-    Creating data directory from snapshot at 0/15FFB08...
-    waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
-    2021-04-13 09:28:41.875 EEST [984766] LOG:  listening on IPv6 address "::1", port 5433
-    2021-04-13 09:28:41.875 EEST [984766] LOG:  listening on IPv4 address "127.0.0.1", port 5433
-    2021-04-13 09:28:41.883 EEST [984766] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5433"
-    2021-04-13 09:28:41.896 EEST [984767] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
-    2021-04-13 09:28:42.265 EEST [984767] LOG:  database system was not properly shut down; automatic recovery in progress
-    2021-04-13 09:28:42.269 EEST [984767] LOG:  redo starts at 0/15FFB80
-    2021-04-13 09:28:42.272 EEST [984767] LOG:  invalid record length at 0/161F4B0: wanted 24, got 0
-    2021-04-13 09:28:42.272 EEST [984767] LOG:  redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
-    2021-04-13 09:28:42.321 EEST [984766] LOG:  database system is ready to accept connections
-     done
-    server started
-
-Insert some a row on the 'experimental' branch:
-
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-    (1 row)
-    
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')" 
-    INSERT 0 1
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-     inserted on experimental
-    (2 rows)
-    
-See that the other Postgres instance is still running on 'main' branch on port 5432:
-
-
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-    (1 row)
-
-
-
-
-Everything is stored in the .zenith directory:
-
-    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
-    total 12
-    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
-    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
-    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
-
-The 'datadirs' directory contains the datadirs of the running instances:
-
-    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
-    total 8
-    drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
-    drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
-    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
-    total 124
-    drwxr-xr-x 5 heikki heikki  4096 Apr 13 09:27 base
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 global
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_commit_ts
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_dynshmem
-    -rw------- 1 heikki heikki  4760 Apr 13 09:27 pg_hba.conf
-    -rw------- 1 heikki heikki  1636 Apr 13 09:27 pg_ident.conf
-    drwxr-xr-x 4 heikki heikki  4096 Apr 13 09:32 pg_logical
-    drwxr-xr-x 4 heikki heikki  4096 Apr 13 09:27 pg_multixact
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_notify
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_replslot
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_serial
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_snapshots
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_stat
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:34 pg_stat_tmp
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_subtrans
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_tblspc
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_twophase
-    -rw------- 1 heikki heikki     3 Apr 13 09:27 PG_VERSION
-    lrwxrwxrwx 1 heikki heikki    52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_xact
-    -rw------- 1 heikki heikki    88 Apr 13 09:27 postgresql.auto.conf
-    -rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
-    -rw------- 1 heikki heikki    96 Apr 13 09:27 postmaster.opts
-    -rw------- 1 heikki heikki   149 Apr 13 09:27 postmaster.pid
-
-Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
-datadir is ephemeral, you can delete it at any time, and it can be reconstructed
-from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
-the repository, the 'datadirs' are not included. (They are like git working trees)
-
-    ~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
-    ~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
-    Creating data directory from snapshot at 0/15FFB08...
-    waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
-    2021-04-13 09:37:05.477 EEST [985340] LOG:  listening on IPv6 address "::1", port 5433
-    2021-04-13 09:37:05.477 EEST [985340] LOG:  listening on IPv4 address "127.0.0.1", port 5433
-    2021-04-13 09:37:05.487 EEST [985340] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5433"
-    2021-04-13 09:37:05.498 EEST [985341] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
-    2021-04-13 09:37:05.808 EEST [985341] LOG:  database system was not properly shut down; automatic recovery in progress
-    2021-04-13 09:37:05.813 EEST [985341] LOG:  redo starts at 0/15FFB80
-    2021-04-13 09:37:05.815 EEST [985341] LOG:  invalid record length at 0/161F770: wanted 24, got 0
-    2021-04-13 09:37:05.815 EEST [985341] LOG:  redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
-    2021-04-13 09:37:05.866 EEST [985340] LOG:  database system is ready to accept connections
-     done
-    server started
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-     inserted on experimental
-    (2 rows)
-
--- a/control_plane/.gitignore
+++ b/control_plane/.gitignore
@@ -1 +0,0 @@
-tmp_check/
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -1,30 +0,0 @@
-[package]
-name = "control_plane"
-version = "0.1.0"
-authors = ["Stas Kelvich <stas@zenith.tech>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-rand = "0.8.3"
-tar = "0.4.33"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-serde = { version = "1.0", features = ["derive"] }
-serde_json = "1"
-toml = "0.5"
-lazy_static = "1.4"
-regex = "1"
-anyhow = "1.0"
-# hex = "0.4.3"
-bytes = "1.0.1"
-# fs_extra = "1.2.0"
-nix = "0.20"
-# thiserror = "1"
-url = "2.2.2"
-
-pageserver = { path = "../pageserver" }
-walkeeper = { path = "../walkeeper" }
-postgres_ffi = { path = "../postgres_ffi" }
-zenith_utils = { path = "../zenith_utils" }
-workspace_hack = { path = "../workspace_hack" }
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -1,467 +0,0 @@
-use std::io::Write;
-use std::net::SocketAddr;
-use std::net::TcpStream;
-use std::os::unix::fs::PermissionsExt;
-use std::process::Command;
-use std::sync::Arc;
-use std::time::Duration;
-use std::{collections::BTreeMap, path::PathBuf};
-use std::{
-    fs::{self, OpenOptions},
-    io::Read,
-};
-
-use anyhow::{Context, Result};
-use lazy_static::lazy_static;
-use regex::Regex;
-
-use crate::local_env::LocalEnv;
-use pageserver::ZTimelineId;
-
-use crate::storage::PageServerNode;
-
-//
-// ComputeControlPlane
-//
-pub struct ComputeControlPlane {
-    base_port: u16,
-    pageserver: Arc<PageServerNode>,
-    pub nodes: BTreeMap<String, Arc<PostgresNode>>,
-    env: LocalEnv,
-}
-
-impl ComputeControlPlane {
-    // Load current nodes with ports from data directories on disk
-    pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
-        // TODO: since pageserver do not have config file yet we believe here that
-        // it is running on default port. Change that when pageserver will have config.
-        let pageserver = Arc::new(PageServerNode::from_env(&env));
-
-        let pgdatadirspath = &env.pg_data_dirs_path();
-        let nodes: Result<BTreeMap<_, _>> = fs::read_dir(&pgdatadirspath)
-            .with_context(|| format!("failed to list {}", pgdatadirspath.display()))?
-            .into_iter()
-            .map(|f| {
-                PostgresNode::from_dir_entry(f?, &env, &pageserver)
-                    .map(|node| (node.name.clone(), Arc::new(node)))
-            })
-            .collect();
-        let nodes = nodes?;
-
-        Ok(ComputeControlPlane {
-            base_port: 55431,
-            pageserver,
-            nodes,
-            env,
-        })
-    }
-
-    fn get_port(&mut self) -> u16 {
-        1 + self
-            .nodes
-            .iter()
-            .map(|(_name, node)| node.address.port())
-            .max()
-            .unwrap_or(self.base_port)
-    }
-
-    pub fn local(local_env: &LocalEnv, pageserver: &Arc<PageServerNode>) -> ComputeControlPlane {
-        ComputeControlPlane {
-            base_port: 65431,
-            pageserver: Arc::clone(pageserver),
-            nodes: BTreeMap::new(),
-            env: local_env.clone(),
-        }
-    }
-
-    /// Connect to a page server, get base backup, and untar it to initialize a
-    /// new data directory
-    pub fn new_from_page_server(
-        &mut self,
-        is_test: bool,
-        timelineid: ZTimelineId,
-        name: &str,
-    ) -> Result<Arc<PostgresNode>> {
-        let node = Arc::new(PostgresNode {
-            name: name.to_owned(),
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), self.get_port()),
-            env: self.env.clone(),
-            pageserver: Arc::clone(&self.pageserver),
-            is_test,
-            timelineid,
-        });
-
-        node.init_from_page_server()?;
-        self.nodes.insert(node.name.clone(), Arc::clone(&node));
-
-        Ok(node)
-    }
-
-    pub fn new_test_node(&mut self, branch_name: &str) -> Arc<PostgresNode> {
-        let timeline_id = self
-            .pageserver
-            .branch_get_by_name(branch_name)
-            .expect("failed to get timeline_id")
-            .timeline_id;
-
-        let node = self.new_from_page_server(true, timeline_id, branch_name);
-        let node = node.unwrap();
-
-        // Configure the node to stream WAL directly to the pageserver
-        node.append_conf(
-            "postgresql.conf",
-            format!(
-                "shared_preload_libraries = zenith\n\
-                zenith.callmemaybe_connstring = '{}'\n", // FIXME escaping
-                node.connstr()
-            )
-            .as_str(),
-        )
-        .unwrap();
-
-        node
-    }
-
-    pub fn new_test_master_node(&mut self, branch_name: &str) -> Arc<PostgresNode> {
-        let timeline_id = self
-            .pageserver
-            .branch_get_by_name(branch_name)
-            .expect("failed to get timeline_id")
-            .timeline_id;
-
-        let node = self
-            .new_from_page_server(true, timeline_id, branch_name)
-            .unwrap();
-
-        node.append_conf(
-            "postgresql.conf",
-            "synchronous_standby_names = 'safekeeper_proxy'\n",
-        )
-        .unwrap();
-
-        node
-    }
-
-    pub fn new_node(&mut self, branch_name: &str) -> Result<Arc<PostgresNode>> {
-        let timeline_id = self.pageserver.branch_get_by_name(branch_name)?.timeline_id;
-
-        let node = self.new_from_page_server(false, timeline_id, branch_name)?;
-
-        // Configure the node to stream WAL directly to the pageserver
-        node.append_conf(
-            "postgresql.conf",
-            format!(
-                "shared_preload_libraries = zenith\n\
-                zenith.callmemaybe_connstring = '{}'\n", // FIXME escaping
-                node.connstr()
-            )
-            .as_str(),
-        )?;
-
-        Ok(node)
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-pub struct PostgresNode {
-    pub address: SocketAddr,
-    name: String,
-    pub env: LocalEnv,
-    pageserver: Arc<PageServerNode>,
-    is_test: bool,
-    pub timelineid: ZTimelineId,
-}
-
-impl PostgresNode {
-    fn from_dir_entry(
-        entry: std::fs::DirEntry,
-        env: &LocalEnv,
-        pageserver: &Arc<PageServerNode>,
-    ) -> Result<PostgresNode> {
-        if !entry.file_type()?.is_dir() {
-            anyhow::bail!(
-                "PostgresNode::from_dir_entry failed: '{}' is not a directory",
-                entry.path().display()
-            );
-        }
-
-        lazy_static! {
-            static ref CONF_PORT_RE: Regex = Regex::new(r"(?m)^\s*port\s*=\s*(\d+)\s*$").unwrap();
-            static ref CONF_TIMELINE_RE: Regex =
-                Regex::new(r"(?m)^\s*zenith.zenith_timeline\s*=\s*'(\w+)'\s*$").unwrap();
-        }
-
-        // parse data directory name
-        let fname = entry.file_name();
-        let name = fname.to_str().unwrap().to_string();
-
-        // find out tcp port in config file
-        let cfg_path = entry.path().join("postgresql.conf");
-        let config = fs::read_to_string(cfg_path.clone()).with_context(|| {
-            format!(
-                "failed to read config file in {}",
-                cfg_path.to_str().unwrap()
-            )
-        })?;
-
-        // parse port
-        let err_msg = format!(
-            "failed to find port definition in config file {}",
-            cfg_path.to_str().unwrap()
-        );
-        let port: u16 = CONF_PORT_RE
-            .captures(config.as_str())
-            .ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 1"))?
-            .iter()
-            .last()
-            .ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 2"))?
-            .ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 3"))?
-            .as_str()
-            .parse()
-            .with_context(|| err_msg)?;
-
-        // parse timeline
-        let err_msg = format!(
-            "failed to find timeline definition in config file {}",
-            cfg_path.to_str().unwrap()
-        );
-        let timelineid: ZTimelineId = CONF_TIMELINE_RE
-            .captures(config.as_str())
-            .ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 1"))?
-            .iter()
-            .last()
-            .ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 2"))?
-            .ok_or_else(|| anyhow::Error::msg(err_msg.clone() + " 3"))?
-            .as_str()
-            .parse()
-            .with_context(|| err_msg)?;
-
-        // ok now
-        Ok(PostgresNode {
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
-            name,
-            env: env.clone(),
-            pageserver: Arc::clone(pageserver),
-            is_test: false,
-            timelineid,
-        })
-    }
-
-    // Connect to a page server, get base backup, and untar it to initialize a
-    // new data directory
-    pub fn init_from_page_server(&self) -> Result<()> {
-        let pgdata = self.pgdata();
-        println!(
-            "Extracting base backup to create postgres instance: path={} port={}",
-            pgdata.display(),
-            self.address.port()
-        );
-
-        // initialize data directory
-        if self.is_test {
-            fs::remove_dir_all(&pgdata).ok();
-        }
-
-        let sql = format!("basebackup {}", self.timelineid);
-        let mut client = self
-            .pageserver
-            .page_server_psql_client()
-            .with_context(|| "connecting to page server failed")?;
-
-        fs::create_dir_all(&pgdata)
-            .with_context(|| format!("could not create data directory {}", pgdata.display()))?;
-        fs::set_permissions(pgdata.as_path(), fs::Permissions::from_mode(0o700)).with_context(
-            || {
-                format!(
-                    "could not set permissions in data directory {}",
-                    pgdata.display()
-                )
-            },
-        )?;
-
-        // FIXME: The compute node should be able to stream the WAL it needs from the WAL safekeepers or archive.
-        // But that's not implemented yet. For now, 'pg_wal' is included in the base backup tarball that
-        // we receive from the Page Server, so we don't need to create the empty 'pg_wal' directory here.
-        //fs::create_dir_all(pgdata.join("pg_wal"))?;
-
-        let mut copyreader = client
-            .copy_out(sql.as_str())
-            .with_context(|| "page server 'basebackup' command failed")?;
-
-        // FIXME: Currently, we slurp the whole tarball into memory, and then extract it,
-        // but we really should do this:
-        //let mut ar = tar::Archive::new(copyreader);
-        let mut buf = vec![];
-        copyreader
-            .read_to_end(&mut buf)
-            .with_context(|| "reading base backup from page server failed")?;
-        let mut ar = tar::Archive::new(buf.as_slice());
-        ar.unpack(&pgdata)
-            .with_context(|| "extracting page backup failed")?;
-
-        // listen for selected port
-        self.append_conf(
-            "postgresql.conf",
-            &format!(
-                "max_wal_senders = 10\n\
-                 max_replication_slots = 10\n\
-                 hot_standby = on\n\
-                 shared_buffers = 1MB\n\
-                 fsync = off\n\
-                 max_connections = 100\n\
-                 wal_sender_timeout = 0\n\
-                 wal_level = replica\n\
-                 listen_addresses = '{address}'\n\
-                 port = {port}\n",
-                address = self.address.ip(),
-                port = self.address.port()
-            ),
-        )?;
-
-        // Never clean up old WAL. TODO: We should use a replication
-        // slot or something proper, to prevent the compute node
-        // from removing WAL that hasn't been streamed to the safekeepr or
-        // page server yet. But this will do for now.
-        self.append_conf("postgresql.conf", "wal_keep_size='10TB'\n")?;
-
-        // Connect it to the page server.
-
-        // Configure that node to take pages from pageserver
-        self.append_conf(
-            "postgresql.conf",
-            &format!(
-                "shared_preload_libraries = zenith \n\
-                 zenith.page_server_connstring = 'host={} port={}'\n\
-                 zenith.zenith_timeline='{}'\n",
-                self.pageserver.address().ip(),
-                self.pageserver.address().port(),
-                self.timelineid
-            ),
-        )?;
-
-        fs::create_dir_all(self.pgdata().join("pg_wal"))?;
-        fs::create_dir_all(self.pgdata().join("pg_wal").join("archive_status"))?;
-        self.pg_resetwal(&["-f"])?;
-        Ok(())
-    }
-
-    pub fn pgdata(&self) -> PathBuf {
-        self.env.pg_data_dir(&self.name)
-    }
-
-    pub fn status(&self) -> &str {
-        let timeout = Duration::from_millis(300);
-        let has_pidfile = self.pgdata().join("postmaster.pid").exists();
-        let can_connect = TcpStream::connect_timeout(&self.address, timeout).is_ok();
-
-        match (has_pidfile, can_connect) {
-            (true, true) => "running",
-            (false, false) => "stopped",
-            (true, false) => "crashed",
-            (false, true) => "running, no pidfile",
-        }
-    }
-
-    pub fn append_conf(&self, config: &str, opts: &str) -> Result<()> {
-        OpenOptions::new()
-            .append(true)
-            .open(self.pgdata().join(config).to_str().unwrap())?
-            .write_all(opts.as_bytes())?;
-        Ok(())
-    }
-
-    fn pg_ctl(&self, args: &[&str]) -> Result<()> {
-        let pg_ctl_path = self.env.pg_bin_dir().join("pg_ctl");
-
-        let pg_ctl = Command::new(pg_ctl_path)
-            .args(
-                [
-                    &[
-                        "-D",
-                        self.pgdata().to_str().unwrap(),
-                        "-l",
-                        self.pgdata().join("log").to_str().unwrap(),
-                    ],
-                    args,
-                ]
-                .concat(),
-            )
-            .env_clear()
-            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .status()
-            .with_context(|| "pg_ctl failed")?;
-        if !pg_ctl.success() {
-            anyhow::bail!("pg_ctl failed");
-        }
-        Ok(())
-    }
-
-    fn pg_resetwal(&self, args: &[&str]) -> Result<()> {
-        let pg_resetwal_path = self.env.pg_bin_dir().join("pg_resetwal");
-
-        let pg_ctl = Command::new(pg_resetwal_path)
-            .args([&["-D", self.pgdata().to_str().unwrap()], args].concat())
-            .status()
-            .with_context(|| "pg_resetwal failed")?;
-        if !pg_ctl.success() {
-            anyhow::bail!("pg_resetwal failed");
-        }
-        Ok(())
-    }
-
-    pub fn start(&self) -> Result<()> {
-        println!("Starting postgres node at '{}'", self.connstr());
-        self.pg_ctl(&["start"])
-    }
-
-    pub fn restart(&self) -> Result<()> {
-        self.pg_ctl(&["restart"])
-    }
-
-    pub fn stop(&self, destroy: bool) -> Result<()> {
-        self.pg_ctl(&["-m", "immediate", "stop"])?;
-        if destroy {
-            println!(
-                "Destroying postgres data directory '{}'",
-                self.pgdata().to_str().unwrap()
-            );
-            fs::remove_dir_all(&self.pgdata())?;
-        }
-        Ok(())
-    }
-
-    pub fn connstr(&self) -> String {
-        format!(
-            "host={} port={} user={}",
-            self.address.ip(),
-            self.address.port(),
-            self.whoami()
-        )
-    }
-
-    // XXX: cache that in control plane
-    pub fn whoami(&self) -> String {
-        let output = Command::new("whoami")
-            .output()
-            .expect("failed to execute whoami");
-
-        if !output.status.success() {
-            panic!("whoami failed");
-        }
-
-        String::from_utf8(output.stdout).unwrap().trim().to_string()
-    }
-}
-
-impl Drop for PostgresNode {
-    // destructor to clean up state after test is done
-    // XXX: we may detect failed test by setting some flag in catch_unwind()
-    // and checking it here. But let just clean datadirs on start.
-    fn drop(&mut self) {
-        if self.is_test {
-            let _ = self.stop(true);
-        }
-    }
-}
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -1,31 +0,0 @@
-//
-// Local control plane.
-//
-// Can start, cofigure and stop postgres instances running as a local processes.
-//
-// Intended to be used in integration tests and in CLI tools for
-// local installations.
-//
-use anyhow::{anyhow, bail, Context, Result};
-use std::fs;
-use std::path::Path;
-
-pub mod compute;
-pub mod local_env;
-pub mod storage;
-
-/// Read a PID file
-///
-/// We expect a file that contains a single integer.
-/// We return an i32 for compatibility with libc and nix.
-pub fn read_pidfile(pidfile: &Path) -> Result<i32> {
-    let pid_str = fs::read_to_string(pidfile)
-        .with_context(|| format!("failed to read pidfile {:?}", pidfile))?;
-    let pid: i32 = pid_str
-        .parse()
-        .map_err(|_| anyhow!("failed to parse pidfile {:?}", pidfile))?;
-    if pid < 1 {
-        bail!("pidfile {:?} contained bad value '{}'", pidfile, pid);
-    }
-    Ok(pid)
-}
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -1,166 +0,0 @@
-//
-// This module is responsible for locating and loading paths in a local setup.
-//
-// Now it also provides init method which acts like a stub for proper installation
-// script which will use local paths.
-//
-use anyhow::{anyhow, Result};
-use serde::{Deserialize, Serialize};
-use std::fs;
-use std::path::PathBuf;
-use std::{collections::BTreeMap, env};
-use url::Url;
-
-pub type Remotes = BTreeMap<String, String>;
-
-//
-// This data structures represent deserialized zenith CLI config
-//
-#[derive(Serialize, Deserialize, Clone)]
-pub struct LocalEnv {
-    // Pageserver connection strings
-    pub pageserver_connstring: String,
-
-    // Base directory for both pageserver and compute nodes
-    pub base_data_dir: PathBuf,
-
-    // Path to postgres distribution. It's expected that "bin", "include",
-    // "lib", "share" from postgres distribution are there. If at some point
-    // in time we will be able to run against vanilla postgres we may split that
-    // to four separate paths and match OS-specific installation layout.
-    pub pg_distrib_dir: PathBuf,
-
-    // Path to pageserver binary. Empty for remote pageserver.
-    pub zenith_distrib_dir: Option<PathBuf>,
-
-    pub remotes: Remotes,
-}
-
-impl LocalEnv {
-    // postgres installation paths
-    pub fn pg_bin_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("bin")
-    }
-    pub fn pg_lib_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("lib")
-    }
-
-    pub fn pageserver_bin(&self) -> Result<PathBuf> {
-        Ok(self
-            .zenith_distrib_dir
-            .as_ref()
-            .ok_or(anyhow!("Can not manage remote pageserver"))?
-            .join("pageserver"))
-    }
-
-    pub fn pg_data_dirs_path(&self) -> PathBuf {
-        self.base_data_dir.join("pgdatadirs")
-    }
-
-    pub fn pg_data_dir(&self, name: &str) -> PathBuf {
-        self.pg_data_dirs_path().join(name)
-    }
-
-    // TODO: move pageserver files into ./pageserver
-    pub fn pageserver_data_dir(&self) -> PathBuf {
-        self.base_data_dir.clone()
-    }
-}
-
-fn base_path() -> PathBuf {
-    match std::env::var_os("ZENITH_REPO_DIR") {
-        Some(val) => PathBuf::from(val.to_str().unwrap()),
-        None => ".zenith".into(),
-    }
-}
-
-//
-// Initialize a new Zenith repository
-//
-pub fn init(remote_pageserver: Option<&str>) -> Result<()> {
-    // check if config already exists
-    let base_path = base_path();
-    if base_path.exists() {
-        anyhow::bail!(
-            "{} already exists. Perhaps already initialized?",
-            base_path.to_str().unwrap()
-        );
-    }
-
-    // ok, now check that expected binaries are present
-
-    // Find postgres binaries. Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "tmp_install".
-    let pg_distrib_dir: PathBuf = {
-        if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
-            postgres_bin.into()
-        } else {
-            let cwd = env::current_dir()?;
-            cwd.join("tmp_install")
-        }
-    };
-    if !pg_distrib_dir.join("bin/postgres").exists() {
-        anyhow::bail!("Can't find postgres binary at {:?}", pg_distrib_dir);
-    }
-
-    fs::create_dir(&base_path)?;
-    fs::create_dir(base_path.join("pgdatadirs"))?;
-
-    let conf = if let Some(addr) = remote_pageserver {
-        // check that addr is parsable
-        let _uri = Url::parse(addr).map_err(|e| anyhow!("{}: {}", addr, e))?;
-
-        LocalEnv {
-            pageserver_connstring: format!("postgresql://{}/", addr),
-            pg_distrib_dir,
-            zenith_distrib_dir: None,
-            base_data_dir: base_path,
-            remotes: BTreeMap::default(),
-        }
-    } else {
-        // Find zenith binaries.
-        let zenith_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
-        if !zenith_distrib_dir.join("pageserver").exists() {
-            anyhow::bail!("Can't find pageserver binary.",);
-        }
-
-        LocalEnv {
-            pageserver_connstring: "postgresql://127.0.0.1:6400".to_string(),
-            pg_distrib_dir,
-            zenith_distrib_dir: Some(zenith_distrib_dir),
-            base_data_dir: base_path,
-            remotes: BTreeMap::default(),
-        }
-    };
-
-    let toml = toml::to_string_pretty(&conf)?;
-    fs::write(conf.base_data_dir.join("config"), toml)?;
-
-    Ok(())
-}
-
-// Locate and load config
-pub fn load_config() -> Result<LocalEnv> {
-    let repopath = base_path();
-
-    if !repopath.exists() {
-        anyhow::bail!(
-            "Zenith config is not found in {}. You need to run 'zenith init' first",
-            repopath.to_str().unwrap()
-        );
-    }
-
-    // TODO: check that it looks like a zenith repository
-
-    // load and parse file
-    let config = fs::read_to_string(repopath.join("config"))?;
-    toml::from_str(config.as_str()).map_err(|e| e.into())
-}
-
-// Save config. We use that to change set of remotes from CLI itself.
-pub fn save_config(conf: &LocalEnv) -> Result<()> {
-    let config_path = base_path().join("config");
-    let conf_str = toml::to_string_pretty(conf)?;
-
-    fs::write(config_path, conf_str)?;
-    Ok(())
-}
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,252 +0,0 @@
-use std::collections::HashMap;
-use std::net::{SocketAddr, TcpStream};
-use std::path::PathBuf;
-use std::process::Command;
-use std::thread;
-use std::time::Duration;
-
-use anyhow::{anyhow, bail, Result};
-use nix::sys::signal::{kill, Signal};
-use nix::unistd::Pid;
-use postgres::{Client, NoTls};
-
-use crate::local_env::LocalEnv;
-use crate::read_pidfile;
-use pageserver::branches::BranchInfo;
-
-//
-// Control routines for pageserver.
-//
-// Used in CLI and tests.
-//
-pub struct PageServerNode {
-    pub kill_on_exit: bool,
-    pub listen_address: Option<SocketAddr>,
-    pub env: LocalEnv,
-}
-
-impl PageServerNode {
-    pub fn from_env(env: &LocalEnv) -> PageServerNode {
-        PageServerNode {
-            kill_on_exit: false,
-            listen_address: None, // default
-            env: env.clone(),
-        }
-    }
-
-    pub fn address(&self) -> SocketAddr {
-        match self.listen_address {
-            Some(addr) => addr,
-            None => "127.0.0.1:64000".parse().unwrap(),
-        }
-    }
-
-    pub fn init(&self) -> Result<()> {
-        let mut cmd = Command::new(self.env.pageserver_bin()?);
-        let status = cmd
-            .args(&["--init", "-D", self.env.base_data_dir.to_str().unwrap()])
-            .env_clear()
-            .env("RUST_BACKTRACE", "1")
-            .env(
-                "POSTGRES_DISTRIB_DIR",
-                self.env.pg_distrib_dir.to_str().unwrap(),
-            )
-            .env("ZENITH_REPO_DIR", self.repo_path())
-            .env("PATH", self.env.pg_bin_dir().to_str().unwrap()) // needs postres-wal-redo binary
-            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .status()
-            .expect("pageserver init failed");
-
-        if status.success() {
-            Ok(())
-        } else {
-            Err(anyhow!("pageserver init failed"))
-        }
-    }
-
-    pub fn repo_path(&self) -> PathBuf {
-        self.env.pageserver_data_dir()
-    }
-
-    pub fn pid_file(&self) -> PathBuf {
-        self.repo_path().join("pageserver.pid")
-    }
-
-    pub fn start(&self) -> Result<()> {
-        println!(
-            "Starting pageserver at '{}' in {}",
-            self.address(),
-            self.repo_path().display()
-        );
-
-        let mut cmd = Command::new(self.env.pageserver_bin()?);
-        cmd.args(&[
-            "-l",
-            self.address().to_string().as_str(),
-            "-D",
-            self.repo_path().to_str().unwrap(),
-        ])
-        .arg("-d")
-        .env_clear()
-        .env("RUST_BACKTRACE", "1")
-        .env(
-            "POSTGRES_DISTRIB_DIR",
-            self.env.pg_distrib_dir.to_str().unwrap(),
-        )
-        .env("ZENITH_REPO_DIR", self.repo_path())
-        .env("PATH", self.env.pg_bin_dir().to_str().unwrap()) // needs postres-wal-redo binary
-        .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-        .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap());
-
-        if !cmd.status()?.success() {
-            bail!(
-                "Pageserver failed to start. See '{}' for details.",
-                self.repo_path().join("pageserver.log").display()
-            );
-        }
-
-        // It takes a while for the page server to start up. Wait until it is
-        // open for business.
-        for retries in 1..15 {
-            let client = self.page_server_psql_client();
-            if client.is_ok() {
-                break;
-            } else {
-                println!("Pageserver not responding yet, retrying ({})...", retries);
-                thread::sleep(Duration::from_secs(1));
-            }
-        }
-
-        println!("Pageserver started");
-
-        Ok(())
-    }
-
-    pub fn stop(&self) -> Result<()> {
-        let pid = read_pidfile(&self.pid_file())?;
-        let pid = Pid::from_raw(pid);
-        if kill(pid, Signal::SIGTERM).is_err() {
-            bail!("Failed to kill pageserver with pid {}", pid);
-        }
-
-        // wait for pageserver stop
-        for _ in 0..5 {
-            let stream = TcpStream::connect(self.address());
-            thread::sleep(Duration::from_secs(1));
-            if let Err(_e) = stream {
-                println!("Pageserver stopped");
-                return Ok(());
-            }
-            println!("Stopping pageserver on {}", self.address());
-        }
-
-        bail!("Failed to stop pageserver with pid {}", pid);
-    }
-
-    pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
-        let connstring = format!(
-            "host={} port={} dbname={} user={}",
-            self.address().ip(),
-            self.address().port(),
-            "no_db",
-            "no_user",
-        );
-        let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
-
-        println!("Pageserver query: '{}'", sql);
-        client.simple_query(sql).unwrap()
-    }
-
-    pub fn page_server_psql_client(&self) -> Result<postgres::Client, postgres::Error> {
-        let connstring = format!(
-            "host={} port={} dbname={} user={}",
-            self.address().ip(),
-            self.address().port(),
-            "no_db",
-            "no_user",
-        );
-        Client::connect(connstring.as_str(), NoTls)
-    }
-
-    pub fn branches_list(&self) -> Result<Vec<BranchInfo>> {
-        let mut client = self.page_server_psql_client()?;
-        let query_result = client.simple_query("branch_list")?;
-        let branches_json = query_result
-            .first()
-            .map(|msg| match msg {
-                postgres::SimpleQueryMessage::Row(row) => row.get(0),
-                _ => None,
-            })
-            .flatten()
-            .ok_or_else(|| anyhow!("missing branches"))?;
-
-        let res: Vec<BranchInfo> = serde_json::from_str(branches_json)?;
-        Ok(res)
-    }
-
-    pub fn branch_create(&self, name: &str, startpoint: &str) -> Result<BranchInfo> {
-        let mut client = self.page_server_psql_client()?;
-        let query_result =
-            client.simple_query(format!("branch_create {} {}", name, startpoint).as_str())?;
-
-        let branch_json = query_result
-            .first()
-            .map(|msg| match msg {
-                postgres::SimpleQueryMessage::Row(row) => row.get(0),
-                _ => None,
-            })
-            .flatten()
-            .ok_or_else(|| anyhow!("missing branch"))?;
-
-        let res: BranchInfo = serde_json::from_str(branch_json).map_err(|e| {
-            anyhow!(
-                "failed to parse branch_create response: {}: {}",
-                branch_json,
-                e
-            )
-        })?;
-
-        Ok(res)
-    }
-
-    // TODO: make this a separate request type and avoid loading all the branches
-    pub fn branch_get_by_name(&self, name: &str) -> Result<BranchInfo> {
-        let branch_infos = self.branches_list()?;
-        let branche_by_name: Result<HashMap<String, BranchInfo>> = branch_infos
-            .into_iter()
-            .map(|branch_info| Ok((branch_info.name.clone(), branch_info)))
-            .collect();
-        let branche_by_name = branche_by_name?;
-
-        let branch = branche_by_name
-            .get(name)
-            .ok_or_else(|| anyhow!("Branch {} not found", name))?;
-
-        Ok(branch.clone())
-    }
-
-    pub fn system_id_get(&self) -> Result<u64> {
-        let mut client = self.page_server_psql_client()?;
-        let query_result = client
-            .simple_query("identify_system")?
-            .first()
-            .map(|msg| match msg {
-                postgres::SimpleQueryMessage::Row(row) => row.get(0),
-                _ => None,
-            })
-            .flatten()
-            .ok_or_else(|| anyhow!("failed to get system_id"))?
-            .parse::<u64>()?;
-
-        Ok(query_result)
-    }
-}
-
-impl Drop for PageServerNode {
-    fn drop(&mut self) {
-        if self.kill_on_exit {
-            let _ = self.stop();
-        }
-    }
-}
--- a/integration_tests/.gitignore
+++ b/integration_tests/.gitignore
@@ -1 +0,0 @@
-tmp_check/
--- a/integration_tests/Cargo.toml
+++ b/integration_tests/Cargo.toml
@@ -9,10 +9,8 @@ edition = "2018"
 [dependencies]
 lazy_static = "1.4.0"
 rand = "0.8.3"
-anyhow = "1.0"
-nix = "0.20"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
+postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
+tokio-postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }

 pageserver = { path = "../pageserver" }
 walkeeper = { path = "../walkeeper" }
-control_plane = { path = "../control_plane" }
--- a/integration_tests/src/lib.rs
+++ b/integration_tests/src/lib.rs
@@ -1,416 +0,0 @@
-use std::collections::BTreeMap;
-use std::convert::TryInto;
-use std::fs::{self, File, OpenOptions};
-use std::io::Read;
-use std::net::SocketAddr;
-use std::path::{Path, PathBuf};
-use std::process::{Command, ExitStatus};
-use std::sync::atomic::{AtomicBool, Ordering};
-use std::sync::Arc;
-
-use anyhow::{bail, Result};
-use nix::sys::signal::{kill, Signal};
-use nix::unistd::Pid;
-use postgres;
-
-use control_plane::compute::PostgresNode;
-use control_plane::read_pidfile;
-use control_plane::{local_env::LocalEnv, storage::PageServerNode};
-
-// Find the directory where the binaries were put (i.e. target/debug/)
-fn cargo_bin_dir() -> PathBuf {
-    let mut pathbuf = std::env::current_exe().unwrap();
-
-    pathbuf.pop();
-    if pathbuf.ends_with("deps") {
-        pathbuf.pop();
-    }
-
-    pathbuf
-}
-
-// local compute env for tests
-pub fn create_test_env(testname: &str) -> LocalEnv {
-    let base_path = Path::new(env!("CARGO_MANIFEST_DIR"))
-        .join("../tmp_check/")
-        .join(testname);
-
-    let base_path_str = base_path.to_str().unwrap();
-
-    // Remove remnants of old test repo
-    let _ = fs::remove_dir_all(&base_path);
-
-    fs::create_dir_all(&base_path)
-        .expect(format!("could not create directory for {}", base_path_str).as_str());
-
-    let pgdatadirs_path = base_path.join("pgdatadirs");
-    fs::create_dir(&pgdatadirs_path)
-        .expect(format!("could not create directory {:?}", pgdatadirs_path).as_str());
-
-    LocalEnv {
-        pageserver_connstring: "postgresql://127.0.0.1:64000".to_string(),
-        pg_distrib_dir: Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install"),
-        zenith_distrib_dir: Some(cargo_bin_dir()),
-        base_data_dir: base_path,
-        remotes: BTreeMap::default(),
-    }
-}
-
-//
-// Collection of several example deployments useful for tests.
-//
-// I'm intendedly modelling storage and compute control planes as a separate entities
-// as it is closer to the actual setup.
-//
-pub struct TestStorageControlPlane {
-    pub wal_acceptors: Vec<WalAcceptorNode>,
-    pub pageserver: Arc<PageServerNode>,
-    pub test_done: AtomicBool,
-}
-
-impl TestStorageControlPlane {
-    // postgres <-> page_server
-    //
-    // Initialize a new repository and configure a page server to run in it
-    //
-    pub fn one_page_server(local_env: &LocalEnv) -> TestStorageControlPlane {
-        let pserver = Arc::new(PageServerNode {
-            env: local_env.clone(),
-            kill_on_exit: true,
-            listen_address: None,
-        });
-        pserver.init().unwrap();
-        pserver.start().unwrap();
-
-        TestStorageControlPlane {
-            wal_acceptors: Vec::new(),
-            pageserver: pserver,
-            test_done: AtomicBool::new(false),
-        }
-    }
-
-    // postgres <-> {wal_acceptor1, wal_acceptor2, ...}
-    pub fn fault_tolerant(local_env: &LocalEnv, redundancy: usize) -> TestStorageControlPlane {
-        let mut cplane = TestStorageControlPlane {
-            wal_acceptors: Vec::new(),
-            pageserver: Arc::new(PageServerNode {
-                env: local_env.clone(),
-                kill_on_exit: true,
-                listen_address: None,
-            }),
-            test_done: AtomicBool::new(false),
-            // repopath,
-        };
-        cplane.pageserver.init().unwrap();
-        cplane.pageserver.start().unwrap();
-
-        let systemid = cplane.pageserver.system_id_get().unwrap();
-
-        const WAL_ACCEPTOR_PORT: usize = 54321;
-
-        let datadir_base = local_env.base_data_dir.join("safekeepers");
-        fs::create_dir_all(&datadir_base).unwrap();
-
-        for i in 0..redundancy {
-            let wal_acceptor = WalAcceptorNode {
-                listen: format!("127.0.0.1:{}", WAL_ACCEPTOR_PORT + i)
-                    .parse()
-                    .unwrap(),
-                data_dir: datadir_base.join(format!("wal_acceptor_{}", i)),
-                systemid,
-                env: local_env.clone(),
-                pass_to_pageserver: true,
-            };
-            wal_acceptor.init();
-            wal_acceptor.start();
-            cplane.wal_acceptors.push(wal_acceptor);
-        }
-        cplane
-    }
-
-    pub fn stop(&self) {
-        for wa in self.wal_acceptors.iter() {
-            let _ = wa.stop();
-        }
-        self.test_done.store(true, Ordering::Relaxed);
-    }
-
-    pub fn get_wal_acceptor_conn_info(&self) -> String {
-        self.wal_acceptors
-            .iter()
-            .map(|wa| wa.listen.to_string())
-            .collect::<Vec<String>>()
-            .join(",")
-    }
-
-    pub fn is_running(&self) -> bool {
-        self.test_done.load(Ordering::Relaxed)
-    }
-}
-
-impl Drop for TestStorageControlPlane {
-    fn drop(&mut self) {
-        self.stop();
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//
-// PostgresNodeExt
-//
-///////////////////////////////////////////////////////////////////////////////
-
-///
-/// Testing utilities for PostgresNode type
-///
-pub trait PostgresNodeExt {
-    fn pg_regress(&self) -> ExitStatus;
-    fn pg_bench(&self, clients: u32, seconds: u32) -> ExitStatus;
-    fn start_proxy(&self, wal_acceptors: &str) -> WalProposerNode;
-    fn open_psql(&self, db: &str) -> postgres::Client;
-    fn dump_log_file(&self);
-    fn safe_psql(&self, db: &str, sql: &str) -> Vec<postgres::Row>;
-}
-
-impl PostgresNodeExt for PostgresNode {
-    fn pg_regress(&self) -> ExitStatus {
-        self.safe_psql("postgres", "CREATE DATABASE regression");
-
-        let regress_run_path = self.env.base_data_dir.join("regress");
-        fs::create_dir_all(&regress_run_path).unwrap();
-        fs::create_dir_all(regress_run_path.join("testtablespace")).unwrap();
-        std::env::set_current_dir(regress_run_path).unwrap();
-
-        let regress_build_path =
-            Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install/build/src/test/regress");
-        let regress_src_path =
-            Path::new(env!("CARGO_MANIFEST_DIR")).join("../vendor/postgres/src/test/regress");
-
-        let regress_check = Command::new(regress_build_path.join("pg_regress"))
-            .args(&[
-                "--bindir=''",
-                "--use-existing",
-                format!("--bindir={}", self.env.pg_bin_dir().to_str().unwrap()).as_str(),
-                format!("--dlpath={}", regress_build_path.to_str().unwrap()).as_str(),
-                format!(
-                    "--schedule={}",
-                    regress_src_path.join("parallel_schedule").to_str().unwrap()
-                )
-                .as_str(),
-                format!("--inputdir={}", regress_src_path.to_str().unwrap()).as_str(),
-            ])
-            .env_clear()
-            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .env("PGPORT", self.address.port().to_string())
-            .env("PGUSER", self.whoami())
-            .env("PGHOST", self.address.ip().to_string())
-            .status()
-            .expect("pg_regress failed");
-        if !regress_check.success() {
-            if let Ok(mut file) = File::open("regression.diffs") {
-                let mut buffer = String::new();
-                file.read_to_string(&mut buffer).unwrap();
-                println!("--------------- regression.diffs:\n{}", buffer);
-            }
-            self.dump_log_file();
-        }
-        regress_check
-    }
-
-    fn pg_bench(&self, clients: u32, seconds: u32) -> ExitStatus {
-        let port = self.address.port().to_string();
-        let clients = clients.to_string();
-        let seconds = seconds.to_string();
-        let _pg_bench_init = Command::new(self.env.pg_bin_dir().join("pgbench"))
-            .args(&["-i", "-p", port.as_str(), "postgres"])
-            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .status()
-            .expect("pgbench -i");
-        let pg_bench_run = Command::new(self.env.pg_bin_dir().join("pgbench"))
-            .args(&[
-                "-p",
-                port.as_str(),
-                "-T",
-                seconds.as_str(),
-                "-P",
-                "1",
-                "-c",
-                clients.as_str(),
-                "-M",
-                "prepared",
-                "postgres",
-            ])
-            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .status()
-            .expect("pgbench run");
-        pg_bench_run
-    }
-
-    fn start_proxy(&self, wal_acceptors: &str) -> WalProposerNode {
-        let proxy_path = self.env.pg_bin_dir().join("safekeeper_proxy");
-        match Command::new(proxy_path.as_path())
-            .args(&["--ztimelineid", &self.timelineid.to_string()])
-            .args(&["-s", wal_acceptors])
-            .args(&["-h", &self.address.ip().to_string()])
-            .args(&["-p", &self.address.port().to_string()])
-            .arg("-v")
-            .stderr(
-                OpenOptions::new()
-                    .create(true)
-                    .append(true)
-                    .open(self.pgdata().join("safekeeper_proxy.log"))
-                    .unwrap(),
-            )
-            .spawn()
-        {
-            Ok(child) => WalProposerNode { pid: child.id() },
-            Err(e) => panic!("Failed to launch {:?}: {}", proxy_path, e),
-        }
-    }
-
-    fn dump_log_file(&self) {
-        if let Ok(mut file) = File::open(self.env.pageserver_data_dir().join("pageserver.log")) {
-            let mut buffer = String::new();
-            file.read_to_string(&mut buffer).unwrap();
-            println!("--------------- pageserver.log:\n{}", buffer);
-        }
-    }
-
-    fn safe_psql(&self, db: &str, sql: &str) -> Vec<postgres::Row> {
-        let connstring = format!(
-            "host={} port={} dbname={} user={}",
-            self.address.ip(),
-            self.address.port(),
-            db,
-            self.whoami()
-        );
-        let mut client = postgres::Client::connect(connstring.as_str(), postgres::NoTls).unwrap();
-
-        println!("Running {}", sql);
-        let result = client.query(sql, &[]);
-        if result.is_err() {
-            self.dump_log_file();
-        }
-        result.unwrap()
-    }
-
-    fn open_psql(&self, db: &str) -> postgres::Client {
-        let connstring = format!(
-            "host={} port={} dbname={} user={}",
-            self.address.ip(),
-            self.address.port(),
-            db,
-            self.whoami()
-        );
-        postgres::Client::connect(connstring.as_str(), postgres::NoTls).unwrap()
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//
-// WalAcceptorNode
-//
-///////////////////////////////////////////////////////////////////////////////
-
-//
-// Control routines for WalAcceptor.
-//
-// Now used only in test setups.
-//
-pub struct WalAcceptorNode {
-    listen: SocketAddr,
-    data_dir: PathBuf,
-    systemid: u64,
-    env: LocalEnv,
-    pass_to_pageserver: bool,
-}
-
-impl WalAcceptorNode {
-    pub fn init(&self) {
-        if self.data_dir.exists() {
-            fs::remove_dir_all(self.data_dir.clone()).unwrap();
-        }
-        fs::create_dir_all(self.data_dir.clone()).unwrap();
-    }
-
-    pub fn start(&self) {
-        println!(
-            "Starting wal_acceptor in {} listening '{}'",
-            self.data_dir.to_str().unwrap(),
-            self.listen
-        );
-
-        let ps_arg = if self.pass_to_pageserver {
-            // Tell page server it can receive WAL from this WAL safekeeper
-            ["--pageserver", "127.0.0.1:64000"].to_vec()
-        } else {
-            [].to_vec()
-        };
-
-        let status = Command::new(
-            self.env
-                .zenith_distrib_dir
-                .as_ref()
-                .unwrap()
-                .join("wal_acceptor"),
-        )
-        .args(&["-D", self.data_dir.to_str().unwrap()])
-        .args(&["-l", self.listen.to_string().as_str()])
-        .args(&["--systemid", self.systemid.to_string().as_str()])
-        .args(&ps_arg)
-        .arg("-d")
-        .arg("-n")
-        .status()
-        .expect("failed to start wal_acceptor");
-
-        if !status.success() {
-            panic!("wal_acceptor start failed");
-        }
-    }
-
-    pub fn stop(&self) -> Result<()> {
-        println!("Stopping wal acceptor on {}", self.listen);
-        let pidfile = self.data_dir.join("wal_acceptor.pid");
-        let pid = read_pidfile(&pidfile)?;
-        let pid = Pid::from_raw(pid);
-        if kill(pid, Signal::SIGTERM).is_err() {
-            bail!("Failed to kill wal_acceptor with pid {}", pid);
-        }
-        Ok(())
-    }
-}
-
-impl Drop for WalAcceptorNode {
-    fn drop(&mut self) {
-        // Ignore errors.
-        let _ = self.stop();
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//
-// WalProposerNode
-//
-///////////////////////////////////////////////////////////////////////////////
-
-pub struct WalProposerNode {
-    pub pid: u32,
-}
-
-impl WalProposerNode {
-    pub fn stop(&self) {
-        // std::process::Child::id() returns u32, we need i32.
-        let pid: i32 = self.pid.try_into().unwrap();
-        let pid = Pid::from_raw(pid);
-        kill(pid, Signal::SIGTERM).expect("failed to execute kill");
-    }
-}
-
-impl Drop for WalProposerNode {
-    fn drop(&mut self) {
-        self.stop();
-    }
-}
--- a/integration_tests/tests/control_plane/mod.rs
+++ b/integration_tests/tests/control_plane/mod.rs
@@ -0,0 +1,844 @@
+//
+// Local control plane.
+//
+// Can start, cofigure and stop postgres instances running as a local processes.
+//
+// Intended to be used in integration tests and in CLI tools for
+// local installations.
+//
+
+use std::fs::File;
+use std::fs::{self, OpenOptions};
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use std::str;
+use std::sync::Arc;
+use std::{
+    io::Write,
+    net::{IpAddr, Ipv4Addr, SocketAddr},
+};
+
+use lazy_static::lazy_static;
+use postgres::{Client, NoTls};
+
+use postgres;
+
+lazy_static! {
+    // postgres would be there if it was build by 'make postgres' here in the repo
+    pub static ref PG_BIN_DIR : PathBuf = Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("../tmp_install/bin");
+    pub static ref PG_LIB_DIR : PathBuf = Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("../tmp_install/lib");
+
+    pub static ref BIN_DIR : PathBuf = cargo_bin_dir();
+
+    pub static ref TEST_WORKDIR : PathBuf = Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("tmp_check");
+}
+
+// Find the directory where the binaries were put (i.e. target/debug/)
+pub fn cargo_bin_dir() -> PathBuf {
+    let mut pathbuf = std::env::current_exe().ok().unwrap();
+
+    pathbuf.pop();
+    if pathbuf.ends_with("deps") {
+        pathbuf.pop();
+    }
+
+    return pathbuf;
+}
+
+//
+// I'm intendedly modelling storage and compute control planes as a separate entities
+// as it is closer to the actual setup.
+//
+pub struct StorageControlPlane {
+    pub wal_acceptors: Vec<WalAcceptorNode>,
+    pub page_servers: Vec<PageServerNode>,
+}
+
+impl StorageControlPlane {
+    // postgres <-> page_server
+    pub fn one_page_server(froms3: bool) -> StorageControlPlane {
+        let mut cplane = StorageControlPlane {
+            wal_acceptors: Vec::new(),
+            page_servers: Vec::new(),
+        };
+
+        let pserver = PageServerNode {
+            page_service_addr: "127.0.0.1:65200".parse().unwrap(),
+            data_dir: TEST_WORKDIR.join("pageserver"),
+        };
+        pserver.init();
+        if froms3 {
+            pserver.start_froms3();
+        } else {
+            pserver.start();
+        }
+
+        cplane.page_servers.push(pserver);
+        cplane
+    }
+
+    pub fn fault_tolerant(redundancy: usize) -> StorageControlPlane {
+        let mut cplane = StorageControlPlane {
+            wal_acceptors: Vec::new(),
+            page_servers: Vec::new(),
+        };
+        const WAL_ACCEPTOR_PORT: usize = 54321;
+
+        for i in 0..redundancy {
+            let wal_acceptor = WalAcceptorNode {
+                listen: format!("127.0.0.1:{}", WAL_ACCEPTOR_PORT + i)
+                    .parse()
+                    .unwrap(),
+                data_dir: TEST_WORKDIR.join(format!("wal_acceptor_{}", i)),
+            };
+            wal_acceptor.init();
+            wal_acceptor.start();
+            cplane.wal_acceptors.push(wal_acceptor);
+        }
+        cplane
+    }
+
+    pub fn stop(&self) {
+        for wa in self.wal_acceptors.iter() {
+            wa.stop();
+        }
+    }
+
+    // // postgres <-> wal_acceptor x3 <-> page_server
+    // fn local(&mut self) -> StorageControlPlane {
+    // }
+
+    pub fn page_server_addr(&self) -> &SocketAddr {
+        &self.page_servers[0].page_service_addr
+    }
+
+    pub fn get_wal_acceptor_conn_info(&self) -> String {
+        self.wal_acceptors
+            .iter()
+            .map(|wa| wa.listen.to_string().to_string())
+            .collect::<Vec<String>>()
+            .join(",")
+    }
+
+    pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
+        let addr = &self.page_servers[0].page_service_addr;
+
+        let connstring = format!(
+            "host={} port={} dbname={} user={}",
+            addr.ip(),
+            addr.port(),
+            "no_db",
+            "no_user",
+        );
+        let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
+
+        println!("Pageserver query: '{}'", sql);
+        client.simple_query(sql).unwrap()
+    }
+}
+
+impl Drop for StorageControlPlane {
+    fn drop(&mut self) {
+        self.stop();
+    }
+}
+
+pub struct PageServerNode {
+    page_service_addr: SocketAddr,
+    data_dir: PathBuf,
+}
+
+impl PageServerNode {
+    // TODO: method to force redo on a specific relation
+
+    // TODO: make wal-redo-postgres workable without data directory?
+    pub fn init(&self) {
+        fs::create_dir_all(self.data_dir.clone()).unwrap();
+
+        let datadir_path = self.data_dir.join("wal_redo_pgdata");
+        fs::remove_dir_all(datadir_path.to_str().unwrap()).ok();
+
+        let initdb = Command::new(PG_BIN_DIR.join("initdb"))
+            .args(&["-D", datadir_path.to_str().unwrap()])
+            .arg("-N")
+            .arg("--no-instructions")
+            .env_clear()
+            .env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
+            .status()
+            .expect("failed to execute initdb");
+        if !initdb.success() {
+            panic!("initdb failed");
+        }
+    }
+
+    pub fn start(&self) {
+        println!("Starting pageserver at '{}'", self.page_service_addr);
+
+        let status = Command::new(BIN_DIR.join("pageserver"))
+            .args(&["-D", self.data_dir.to_str().unwrap()])
+            .args(&["-l", self.page_service_addr.to_string().as_str()])
+            .arg("-d")
+            .arg("--skip-recovery")
+            .env_clear()
+            .env("PATH", PG_BIN_DIR.to_str().unwrap()) // path to postres-wal-redo binary
+            .status()
+            .expect("failed to start pageserver");
+
+        if !status.success() {
+            panic!("pageserver start failed");
+        }
+    }
+
+    pub fn start_froms3(&self) {
+        println!("Starting pageserver at '{}'", self.page_service_addr);
+
+        let status = Command::new(BIN_DIR.join("pageserver"))
+            .args(&["-D", self.data_dir.to_str().unwrap()])
+            .args(&["-l", self.page_service_addr.to_string().as_str()])
+            .arg("-d")
+            .env_clear()
+            .env("PATH", PG_BIN_DIR.to_str().unwrap()) // path to postres-wal-redo binary
+            .env("S3_ENDPOINT", "https://127.0.0.1:9000")
+            .env("S3_REGION", "us-east-1")
+            .env("S3_ACCESSKEY", "minioadmin")
+            .env("S3_SECRET", "minioadmin")
+            .status()
+            .expect("failed to start pageserver");
+
+        if !status.success() {
+            panic!("pageserver start failed");
+        }
+    }
+
+    pub fn stop(&self) {
+        let pidfile = self.data_dir.join("pageserver.pid");
+        let pid = fs::read_to_string(pidfile).unwrap();
+        let status = Command::new("kill")
+            .arg(pid)
+            .env_clear()
+            .status()
+            .expect("failed to execute kill");
+
+        if !status.success() {
+            panic!("kill start failed");
+        }
+    }
+}
+
+impl Drop for PageServerNode {
+    fn drop(&mut self) {
+        self.stop();
+        // fs::remove_dir_all(self.data_dir.clone()).unwrap();
+    }
+}
+
+pub struct WalAcceptorNode {
+    listen: SocketAddr,
+    data_dir: PathBuf,
+}
+
+impl WalAcceptorNode {
+    pub fn init(&self) {
+        if self.data_dir.exists() {
+            fs::remove_dir_all(self.data_dir.clone()).unwrap();
+        }
+        fs::create_dir_all(self.data_dir.clone()).unwrap();
+    }
+
+    pub fn start(&self) {
+        println!(
+            "Starting wal_acceptor in {} listening '{}'",
+            self.data_dir.to_str().unwrap(),
+            self.listen
+        );
+
+        let status = Command::new(BIN_DIR.join("wal_acceptor"))
+            .args(&["-D", self.data_dir.to_str().unwrap()])
+            .args(&["-l", self.listen.to_string().as_str()])
+            .arg("-d")
+            .arg("-n")
+            .status()
+            .expect("failed to start wal_acceptor");
+
+        if !status.success() {
+            panic!("wal_acceptor start failed");
+        }
+    }
+
+    pub fn stop(&self) {
+        let pidfile = self.data_dir.join("wal_acceptor.pid");
+        if let Ok(pid) = fs::read_to_string(pidfile) {
+            let _status = Command::new("kill")
+                .arg(pid)
+                .env_clear()
+                .status()
+                .expect("failed to execute kill");
+        }
+    }
+}
+
+impl Drop for WalAcceptorNode {
+    fn drop(&mut self) {
+        self.stop();
+        // fs::remove_dir_all(self.data_dir.clone()).unwrap();
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+//
+// ComputeControlPlane
+//
+pub struct ComputeControlPlane<'a> {
+    pg_bin_dir: PathBuf,
+    work_dir: PathBuf,
+    last_assigned_port: u16,
+    storage_cplane: &'a StorageControlPlane,
+    nodes: Vec<Arc<PostgresNode>>,
+}
+
+impl ComputeControlPlane<'_> {
+    pub fn local(storage_cplane: &StorageControlPlane) -> ComputeControlPlane {
+        ComputeControlPlane {
+            pg_bin_dir: PG_BIN_DIR.to_path_buf(),
+            work_dir: TEST_WORKDIR.to_path_buf(),
+            last_assigned_port: 65431,
+            storage_cplane: storage_cplane,
+            nodes: Vec::new(),
+        }
+    }
+
+    // TODO: check port availability and
+    fn get_port(&mut self) -> u16 {
+        let port = self.last_assigned_port + 1;
+        self.last_assigned_port += 1;
+        port
+    }
+
+    pub fn new_vanilla_node<'a>(&mut self) -> &Arc<PostgresNode> {
+        // allocate new node entry with generated port
+        let node_id = self.nodes.len() + 1;
+        let node = PostgresNode {
+            _node_id: node_id,
+            port: self.get_port(),
+            ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
+            pgdata: self.work_dir.join(format!("compute/pg{}", node_id)),
+            pg_bin_dir: self.pg_bin_dir.clone(),
+        };
+        self.nodes.push(Arc::new(node));
+        let node = self.nodes.last().unwrap();
+
+        // initialize data directory
+        fs::remove_dir_all(node.pgdata.to_str().unwrap()).ok();
+        let initdb_path = self.pg_bin_dir.join("initdb");
+        println!("initdb_path: {}", initdb_path.to_str().unwrap());
+        let initdb = Command::new(initdb_path)
+            .args(&["-D", node.pgdata.to_str().unwrap()])
+            .arg("-N")
+            .arg("--no-instructions")
+            .env_clear()
+            .env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
+            .status()
+            .expect("failed to execute initdb");
+
+        if !initdb.success() {
+            panic!("initdb failed");
+        }
+
+        // // allow local replication connections
+        // node.append_conf("pg_hba.conf", format!("\
+        //     host replication all {}/32 sspi include_realm=1 map=regress\n\
+        // ", node.ip).as_str());
+
+        // listen for selected port
+        node.append_conf(
+            "postgresql.conf",
+            format!(
+                "\
+            max_wal_senders = 10\n\
+            max_replication_slots = 10\n\
+            hot_standby = on\n\
+            shared_buffers = 1MB\n\
+            max_connections = 100\n\
+            wal_level = replica\n\
+            listen_addresses = '{address}'\n\
+            port = {port}\n\
+        ",
+                address = node.ip,
+                port = node.port
+            )
+            .as_str(),
+        );
+
+        node
+    }
+
+    // Init compute node without files, only datadir structure
+    // use initdb --compute-node flag and GUC 'computenode_mode'
+    // to distinguish the node
+    pub fn new_minimal_node<'a>(&mut self) -> &Arc<PostgresNode> {
+        // allocate new node entry with generated port
+        let node_id = self.nodes.len() + 1;
+        let node = PostgresNode {
+            _node_id: node_id,
+            port: self.get_port(),
+            ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
+            pgdata: self.work_dir.join(format!("compute/pg{}", node_id)),
+            pg_bin_dir: self.pg_bin_dir.clone(),
+        };
+        self.nodes.push(Arc::new(node));
+        let node = self.nodes.last().unwrap();
+
+        // initialize data directory
+        fs::remove_dir_all(node.pgdata.to_str().unwrap()).ok();
+        let initdb_path = self.pg_bin_dir.join("initdb");
+        println!("initdb_path: {}", initdb_path.to_str().unwrap());
+        let initdb = Command::new(initdb_path)
+            .args(&["-D", node.pgdata.to_str().unwrap()])
+            .arg("-N")
+            .arg("--no-instructions")
+            .arg("--compute-node")
+            .env_clear()
+            .env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
+            .status()
+            .expect("failed to execute initdb");
+
+        if !initdb.success() {
+            panic!("initdb failed");
+        }
+
+        // // allow local replication connections
+        // node.append_conf("pg_hba.conf", format!("\
+        //     host replication all {}/32 sspi include_realm=1 map=regress\n\
+        // ", node.ip).as_str());
+
+        // listen for selected port
+        node.append_conf(
+            "postgresql.conf",
+            format!(
+                "\
+            max_wal_senders = 10\n\
+            max_replication_slots = 10\n\
+            hot_standby = on\n\
+            shared_buffers = 1MB\n\
+            max_connections = 100\n\
+            wal_level = replica\n\
+            listen_addresses = '{address}'\n\
+            port = {port}\n\
+            computenode_mode = true\n\
+            ",
+                address = node.ip,
+                port = node.port
+            )
+            .as_str(),
+        );
+        node
+    }
+
+    pub fn new_node_wo_data(&mut self) -> Arc<PostgresNode> {
+        let storage_cplane = self.storage_cplane;
+        let node = self.new_minimal_node();
+
+        let pserver = storage_cplane.page_server_addr();
+
+        // Configure that node to take pages from pageserver
+        node.append_conf(
+            "postgresql.conf",
+            format!(
+                "\
+            page_server_connstring = 'host={} port={}'\n\
+        ",
+                pserver.ip(),
+                pserver.port()
+            )
+            .as_str(),
+        );
+
+        node.clone()
+    }
+
+    pub fn new_node(&mut self) -> Arc<PostgresNode> {
+        let storage_cplane = self.storage_cplane;
+        let node = self.new_vanilla_node();
+
+        let pserver = storage_cplane.page_server_addr();
+
+        // Configure that node to take pages from pageserver
+        node.append_conf(
+            "postgresql.conf",
+            format!(
+                "\
+            page_server_connstring = 'host={} port={}'\n\
+        ",
+                pserver.ip(),
+                pserver.port()
+            )
+            .as_str(),
+        );
+
+        node.clone()
+    }
+
+    pub fn new_master_node(&mut self) -> Arc<PostgresNode> {
+        let node = self.new_vanilla_node();
+
+        node.append_conf(
+            "postgresql.conf",
+            "synchronous_standby_names = 'safekeeper_proxy'\n\
+						 ",
+        );
+        node.clone()
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+pub struct WalProposerNode {
+    pid: u32,
+}
+
+impl WalProposerNode {
+    pub fn stop(&self) {
+        let status = Command::new("kill")
+            .arg(self.pid.to_string())
+            .env_clear()
+            .status()
+            .expect("failed to execute kill");
+
+        if !status.success() {
+            panic!("kill start failed");
+        }
+    }
+}
+
+impl Drop for WalProposerNode {
+    fn drop(&mut self) {
+        self.stop();
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+pub struct PostgresNode {
+    _node_id: usize,
+    pub port: u16,
+    pub ip: IpAddr,
+    pgdata: PathBuf,
+    pg_bin_dir: PathBuf,
+}
+
+impl PostgresNode {
+    pub fn append_conf(&self, config: &str, opts: &str) {
+        OpenOptions::new()
+            .append(true)
+            .open(self.pgdata.join(config).to_str().unwrap())
+            .unwrap()
+            .write_all(opts.as_bytes())
+            .unwrap();
+    }
+
+    fn pg_ctl(&self, args: &[&str], check_ok: bool) {
+        let pg_ctl_path = self.pg_bin_dir.join("pg_ctl");
+        let pg_ctl = Command::new(pg_ctl_path)
+            .args(
+                [
+                    &[
+                        "-D",
+                        self.pgdata.to_str().unwrap(),
+                        "-l",
+                        self.pgdata.join("log").to_str().unwrap(),
+                    ],
+                    args,
+                ]
+                .concat(),
+            )
+            .env_clear()
+            .env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
+            .status()
+            .expect("failed to execute pg_ctl");
+
+        if check_ok && !pg_ctl.success() {
+            panic!("pg_ctl failed");
+        }
+    }
+
+    pub fn start(&self, storage_cplane: &StorageControlPlane) {
+        if storage_cplane.page_servers.len() != 0 {
+            let _res =
+                storage_cplane.page_server_psql(format!("callmemaybe {}", self.connstr()).as_str());
+        }
+        println!("Starting postgres node at '{}'", self.connstr());
+        self.pg_ctl(&["start"], true);
+    }
+
+    pub fn restart(&self) {
+        self.pg_ctl(&["restart"], true);
+    }
+
+    pub fn stop(&self) {
+        self.pg_ctl(&["-m", "immediate", "stop"], true);
+    }
+
+    pub fn connstr(&self) -> String {
+        format!("host={} port={} user={}", self.ip, self.port, self.whoami())
+    }
+
+    // XXX: cache that in control plane
+    pub fn whoami(&self) -> String {
+        let output = Command::new("whoami")
+            .output()
+            .expect("failed to execute whoami");
+
+        if !output.status.success() {
+            panic!("whoami failed");
+        }
+
+        String::from_utf8(output.stdout).unwrap().trim().to_string()
+    }
+
+    pub fn safe_psql(&self, db: &str, sql: &str) -> Vec<tokio_postgres::Row> {
+        let connstring = format!(
+            "host={} port={} dbname={} user={}",
+            self.ip,
+            self.port,
+            db,
+            self.whoami()
+        );
+        let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
+
+        println!("Running {}", sql);
+        client.query(sql, &[]).unwrap()
+    }
+
+    pub fn open_psql(&self, db: &str) -> Client {
+        let connstring = format!(
+            "host={} port={} dbname={} user={}",
+            self.ip,
+            self.port,
+            db,
+            self.whoami()
+        );
+        Client::connect(connstring.as_str(), NoTls).unwrap()
+    }
+
+    pub fn get_pgdata(&self) -> Option<&str> {
+        self.pgdata.to_str()
+    }
+
+    // Request from pageserver stub controlfile, respective xlog
+    // and a bunch of files needed to start computenode
+    //
+    // NOTE this "file" request is a crutch.
+    // It asks pageserver to write requested page to the provided filepath
+    // and thus only works locally.
+    // TODO receive pages via some libpq protocol.
+    // The problem I've met is that nonrelfiles are not valid utf8 and cannot be
+    // handled by simple_query(). that expects test.
+    // And reqular query() uses prepared queries.
+
+    // TODO pass sysid as parameter
+    pub fn setup_compute_node(&self, sysid: u64, storage_cplane: &StorageControlPlane) {
+        let mut query;
+        //Request pg_control from pageserver
+        query = format!(
+            "file {}/global/pg_control,{},{},{},{},{},{},{}",
+            self.pgdata.to_str().unwrap(),
+            sysid as u64, //sysid
+            1664,         //tablespace
+            0,            //dboid
+            0,            //reloid
+            42,           //forknum pg_control
+            0,            //blkno
+            0             //lsn
+        );
+        storage_cplane.page_server_psql(query.as_str());
+
+        //Request pg_xact and pg_multixact from pageserver
+        //We need them for initial pageserver startup and authentication
+        //TODO figure out which block number we really need
+        query = format!(
+            "file {}/pg_xact/0000,{},{},{},{},{},{},{}",
+            self.pgdata.to_str().unwrap(),
+            sysid as u64, //sysid
+            0,            //tablespace
+            0,            //dboid
+            0,            //reloid
+            44,           //forknum
+            0,            //blkno
+            0             //lsn
+        );
+        storage_cplane.page_server_psql(query.as_str());
+
+        query = format!(
+            "file {}/pg_multixact/offsets/0000,{},{},{},{},{},{},{}",
+            self.pgdata.to_str().unwrap(),
+            sysid as u64, //sysid
+            0,            //tablespace
+            0,            //dboid
+            0,            //reloid
+            45,           //forknum
+            0,            //blkno
+            0             //lsn
+        );
+        storage_cplane.page_server_psql(query.as_str());
+
+        query = format!(
+            "file {}/pg_multixact/members/0000,{},{},{},{},{},{},{}",
+            self.pgdata.to_str().unwrap(),
+            sysid as u64, //sysid
+            0,            //tablespace
+            0,            //dboid
+            0,            //reloid
+            46,           //forknum
+            0,            //blkno
+            0             //lsn
+        );
+        storage_cplane.page_server_psql(query.as_str());
+
+        //Request a few shared catalogs needed for authentication
+        //Without them we cannot setup connection with pageserver to request further pages
+        let reloids = [1260, 1261, 1262, 2396];
+        for reloid in reloids.iter() {
+            //FIXME request all blocks from file, not just 10
+            for blkno in 0..10 {
+                query = format!(
+                    "file {}/global/{},{},{},{},{},{},{},{}",
+                    self.pgdata.to_str().unwrap(),
+                    reloid,       //suse it as filename
+                    sysid as u64, //sysid
+                    1664,         //tablespace
+                    0,            //dboid
+                    reloid,       //reloid
+                    0,            //forknum
+                    blkno,        //blkno
+                    0             //lsn
+                );
+                storage_cplane.page_server_psql(query.as_str());
+            }
+        }
+
+        fs::create_dir(format!("{}/base/13006", self.pgdata.to_str().unwrap())).unwrap();
+        fs::create_dir(format!("{}/base/13007", self.pgdata.to_str().unwrap())).unwrap();
+
+        //FIXME figure out what wal file we need to successfully start
+        let walfilepath = format!(
+            "{}/pg_wal/000000010000000000000001",
+            self.pgdata.to_str().unwrap()
+        );
+        fs::copy(
+            "/home/anastasia/zenith/zenith/tmp_check/pgdata/pg_wal/000000010000000000000001",
+            walfilepath,
+        )
+        .unwrap();
+
+        println!("before resetwal ");
+
+        let pg_resetwal_path = self.pg_bin_dir.join("pg_resetwal");
+
+        // Now it does nothing, just prints existing content of pg_control.
+        // TODO update values with most recent lsn, xid, oid requested from pageserver
+        let pg_resetwal = Command::new(pg_resetwal_path)
+            .args(&["-D", self.pgdata.to_str().unwrap()])
+            .arg("-n") //dry run
+            //.arg("-f")
+            //.args(&["--next-transaction-id", "100500"])
+            //.args(&["--next-oid", "17000"])
+            //.args(&["--next-transaction-id", "100500"])
+            .status()
+            .expect("failed to execute pg_resetwal");
+
+        if !pg_resetwal.success() {
+            panic!("pg_resetwal failed");
+        }
+
+        println!("setup done");
+    }
+
+    pub fn start_proxy(&self, wal_acceptors: String) -> WalProposerNode {
+        let proxy_path = PG_BIN_DIR.join("safekeeper_proxy");
+        match Command::new(proxy_path.as_path())
+            .args(&["-s", &wal_acceptors])
+            .args(&["-h", &self.ip.to_string()])
+            .args(&["-p", &self.port.to_string()])
+            .arg("-v")
+            .stderr(File::create(TEST_WORKDIR.join("safepkeeper_proxy.log")).unwrap())
+            .spawn()
+        {
+            Ok(child) => WalProposerNode { pid: child.id() },
+            Err(e) => panic!("Failed to launch {:?}: {}", proxy_path, e),
+        }
+    }
+
+    pub fn push_to_s3(&self) {
+        println!("Push to s3 node  at '{}'", self.pgdata.to_str().unwrap());
+
+        let zenith_push_path = self.pg_bin_dir.join("zenith_push");
+        println!("zenith_push_path: {}", zenith_push_path.to_str().unwrap());
+
+        let status = Command::new(zenith_push_path)
+            .args(&["-D", self.pgdata.to_str().unwrap()])
+            .env_clear()
+            .env("S3_ENDPOINT", "https://127.0.0.1:9000")
+            .env("S3_REGION", "us-east-1")
+            .env("S3_ACCESSKEY", "minioadmin")
+            .env("S3_SECRET", "minioadmin")
+            // .env("S3_BUCKET", "zenith-testbucket")
+            .status()
+            .expect("failed to push node to s3");
+
+        if !status.success() {
+            panic!("zenith_push failed");
+        }
+    }
+
+    // TODO
+    pub fn pg_bench() {}
+    pub fn pg_regress() {}
+}
+
+impl Drop for PostgresNode {
+    // destructor to clean up state after test is done
+    // XXX: we may detect failed test by setting some flag in catch_unwind()
+    // and checking it here. But let just clean datadirs on start.
+    fn drop(&mut self) {
+        self.stop();
+        // fs::remove_dir_all(self.pgdata.clone()).unwrap();
+    }
+}
+
+pub fn regress_check(pg: &PostgresNode) {
+    pg.safe_psql("postgres", "CREATE DATABASE regression");
+
+    let regress_run_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tmp_check/regress");
+    fs::create_dir_all(regress_run_path.clone()).unwrap();
+    std::env::set_current_dir(regress_run_path).unwrap();
+
+    let regress_build_path =
+        Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install/build/src/test/regress");
+    let regress_src_path =
+        Path::new(env!("CARGO_MANIFEST_DIR")).join("../vendor/postgres/src/test/regress");
+
+    let _regress_check = Command::new(regress_build_path.join("pg_regress"))
+        .args(&[
+            "--bindir=''",
+            "--use-existing",
+            format!("--bindir={}", PG_BIN_DIR.to_str().unwrap()).as_str(),
+            format!("--dlpath={}", regress_build_path.to_str().unwrap()).as_str(),
+            format!(
+                "--schedule={}",
+                regress_src_path.join("parallel_schedule").to_str().unwrap()
+            )
+            .as_str(),
+            format!("--inputdir={}", regress_src_path.to_str().unwrap()).as_str(),
+        ])
+        .env_clear()
+        .env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
+        .env("PGPORT", pg.port.to_string())
+        .env("PGUSER", pg.whoami())
+        .env("PGHOST", pg.ip.to_string())
+        .status()
+        .expect("pg_regress failed");
+}
--- a/integration_tests/tests/test_compute.rs
+++ b/integration_tests/tests/test_compute.rs
@@ -0,0 +1,7 @@
+// test node resettlement to an empty datadir
+#[test]
+fn test_resettlement() {}
+
+// test seq scan of everythin after restart
+#[test]
+fn test_cold_seqscan() {}
--- a/integration_tests/tests/test_control_plane.rs
+++ b/integration_tests/tests/test_control_plane.rs
@@ -0,0 +1,5 @@
+#[test]
+fn test_actions() {}
+
+#[test]
+fn test_regress() {}
--- a/integration_tests/tests/test_pageserver.rs
+++ b/integration_tests/tests/test_pageserver.rs
@@ -0,0 +1,210 @@
+#[allow(dead_code)]
+mod control_plane;
+use std::thread::sleep;
+use std::time::Duration;
+
+use control_plane::ComputeControlPlane;
+use control_plane::StorageControlPlane;
+
+// XXX: force all redo at the end
+// -- restart + seqscan won't read deleted stuff
+// -- pageserver api endpoint to check all rels
+
+//Handcrafted cases with wal records that are (were) problematic for redo.
+#[test]
+#[ignore]
+fn test_redo_cases() {
+    // Start pageserver that reads WAL directly from that postgres
+    let storage_cplane = StorageControlPlane::one_page_server(false);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
+
+    // start postgres
+    let node = compute_cplane.new_node();
+    node.start(&storage_cplane);
+
+    println!("await pageserver connection...");
+    sleep(Duration::from_secs(3));
+
+    // check basic work with table
+    node.safe_psql(
+        "postgres",
+        "CREATE TABLE t(key int primary key, value text)",
+    );
+    node.safe_psql(
+        "postgres",
+        "INSERT INTO t SELECT generate_series(1,100), 'payload'",
+    );
+    let count: i64 = node
+        .safe_psql("postgres", "SELECT sum(key) FROM t")
+        .first()
+        .unwrap()
+        .get(0);
+    println!("sum = {}", count);
+    assert_eq!(count, 5050);
+
+    //check 'create table as'
+    node.safe_psql("postgres", "CREATE TABLE t2 AS SELECT * FROM t");
+    let count: i64 = node
+        .safe_psql("postgres", "SELECT sum(key) FROM t")
+        .first()
+        .unwrap()
+        .get(0);
+    println!("sum = {}", count);
+    assert_eq!(count, 5050);
+}
+
+// Runs pg_regress on a compute node
+#[test]
+#[ignore]
+fn test_regress() {
+    // Start pageserver that reads WAL directly from that postgres
+    let storage_cplane = StorageControlPlane::one_page_server(false);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
+
+    // start postgres
+    let node = compute_cplane.new_node();
+    node.start(&storage_cplane);
+
+    println!("await pageserver connection...");
+    sleep(Duration::from_secs(3));
+
+    control_plane::regress_check(&node);
+}
+
+// Run two postgres instances on one pageserver
+#[test]
+#[ignore]
+fn test_pageserver_multitenancy() {
+    // Start pageserver that reads WAL directly from that postgres
+    let storage_cplane = StorageControlPlane::one_page_server(false);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
+
+    // Allocate postgres instance, but don't start
+    let node1 = compute_cplane.new_node();
+    let node2 = compute_cplane.new_node();
+    node1.start(&storage_cplane);
+    node2.start(&storage_cplane);
+
+    // XXX: add some extension func to postgres to check walsender conn
+    // XXX: or better just drop that
+    println!("await pageserver connection...");
+    sleep(Duration::from_secs(3));
+
+    // check node1
+    node1.safe_psql(
+        "postgres",
+        "CREATE TABLE t(key int primary key, value text)",
+    );
+    node1.safe_psql(
+        "postgres",
+        "INSERT INTO t SELECT generate_series(1,100), 'payload'",
+    );
+    let count: i64 = node1
+        .safe_psql("postgres", "SELECT sum(key) FROM t")
+        .first()
+        .unwrap()
+        .get(0);
+    println!("sum = {}", count);
+    assert_eq!(count, 5050);
+
+    // check node2
+    node2.safe_psql(
+        "postgres",
+        "CREATE TABLE t(key int primary key, value text)",
+    );
+    node2.safe_psql(
+        "postgres",
+        "INSERT INTO t SELECT generate_series(100,200), 'payload'",
+    );
+    let count: i64 = node2
+        .safe_psql("postgres", "SELECT sum(key) FROM t")
+        .first()
+        .unwrap()
+        .get(0);
+    println!("sum = {}", count);
+    assert_eq!(count, 15150);
+}
+
+#[test]
+#[ignore]
+// Start pageserver using s3 base image
+//
+// Requires working minio with hardcoded setup:
+// .env("S3_ENDPOINT", "https://127.0.0.1:9000")
+// .env("S3_REGION", "us-east-1")
+// .env("S3_ACCESSKEY", "minioadmin")
+// .env("S3_SECRET", "minioadmin")
+// .env("S3_BUCKET", "zenith-testbucket")
+// TODO use env variables in test
+fn test_pageserver_recovery() {
+    //This test expects that image is already uploaded to s3
+    //To upload it use zenith_push before test (see node.push_to_s3() for details)
+    let storage_cplane = StorageControlPlane::one_page_server(true);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
+
+    //Wait while daemon uploads pages from s3
+    sleep(Duration::from_secs(15));
+
+    let node_restored = compute_cplane.new_node_wo_data();
+
+    //TODO 6947041219207877724 is a hardcoded sysid for my cluster. Get it somewhere
+    node_restored.setup_compute_node(6947041219207877724, &storage_cplane);
+
+    node_restored.start(&storage_cplane);
+
+    let rows = node_restored.safe_psql("postgres", "SELECT relname from pg_class;");
+
+    assert_eq!(rows.len(), 395);
+}
+
+#[test]
+#[ignore]
+//Scenario for future test. Not implemented yet
+fn test_pageserver_node_switch() {
+    //Create pageserver
+    let storage_cplane = StorageControlPlane::one_page_server(false);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
+
+    //Create reqular node
+    let node = compute_cplane.new_node();
+    node.start(&storage_cplane);
+
+    node.safe_psql(
+        "postgres",
+        "CREATE TABLE t(key int primary key, value text)",
+    );
+    node.safe_psql(
+        "postgres",
+        "INSERT INTO t SELECT generate_series(1,100), 'payload'",
+    );
+    let count: i64 = node
+        .safe_psql("postgres", "SELECT sum(key) FROM t")
+        .first()
+        .unwrap()
+        .get(0);
+    println!("sum = {}", count);
+    assert_eq!(count, 5050);
+
+    //Push all node files to s3
+    //TODO upload them directly to pageserver
+    node.push_to_s3();
+    //Upload data from s3 to pageserver
+    //storage_cplane.upload_from_s3() //Not implemented yet
+
+    //Shut down the node
+    node.stop();
+
+    //Create new node without files
+    let node_restored = compute_cplane.new_node_wo_data();
+
+    // Setup minimal set of files needed to start node and setup pageserver connection
+    // TODO 6947041219207877724 is a hardcoded sysid. Get it from node
+    node_restored.setup_compute_node(6947041219207877724, &storage_cplane);
+
+    //Start compute node without files
+    node_restored.start(&storage_cplane);
+
+    //Ensure that is has table created on initial node
+    let rows = node_restored.safe_psql("postgres", "SELECT key from t;");
+    assert_eq!(rows.len(), 5050);
+}
--- a/integration_tests/tests/test_wal_acceptor.rs
+++ b/integration_tests/tests/test_wal_acceptor.rs
@@ -1,72 +1,28 @@
+// Restart acceptors one by one while compute is under the load.
+#[allow(dead_code)]
+mod control_plane;
+use control_plane::ComputeControlPlane;
+use control_plane::StorageControlPlane;
+
 use rand::Rng;
 use std::sync::Arc;
 use std::time::SystemTime;
 use std::{thread, time};

-use control_plane::compute::{ComputeControlPlane, PostgresNode};
-
-use integration_tests;
-use integration_tests::PostgresNodeExt;
-use integration_tests::TestStorageControlPlane;
-
-const DOWNTIME: u64 = 2;
-
-fn start_node_with_wal_proposer(
-    timeline: &str,
-    compute_cplane: &mut ComputeControlPlane,
-    wal_acceptors: &String,
-) -> Arc<PostgresNode> {
-    let node = compute_cplane.new_test_master_node(timeline);
-    let _node = node.append_conf(
-        "postgresql.conf",
-        &format!("wal_acceptors='{}'\n", wal_acceptors),
-    );
-    node.start().unwrap();
-    node
-}
-
-#[test]
-fn test_embedded_wal_proposer() {
-    let local_env = integration_tests::create_test_env("test_embedded_wal_proposer");
-
-    const REDUNDANCY: usize = 3;
-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
-
-    // start postgres
-    let node = start_node_with_wal_proposer("main", &mut compute_cplane, &wal_acceptors);
-
-    // check basic work with table
-    node.safe_psql(
-        "postgres",
-        "CREATE TABLE t(key int primary key, value text)",
-    );
-    node.safe_psql(
-        "postgres",
-        "INSERT INTO t SELECT generate_series(1,100000), 'payload'",
-    );
-    let count: i64 = node
-        .safe_psql("postgres", "SELECT sum(key) FROM t")
-        .first()
-        .unwrap()
-        .get(0);
-    println!("sum = {}", count);
-    assert_eq!(count, 5000050000);
-    // check wal files equality
-}
-
 #[test]
 fn test_acceptors_normal_work() {
-    let local_env = integration_tests::create_test_env("test_acceptors_normal_work");
-
+    // Start pageserver that reads WAL directly from that postgres
    const REDUNDANCY: usize = 3;
-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
+    let storage_cplane = StorageControlPlane::fault_tolerant(REDUNDANCY);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();

-    // start postgres
-    let node = start_node_with_wal_proposer("main", &mut compute_cplane, &wal_acceptors);
+    // start postgre
+    let node = compute_cplane.new_master_node();
+    node.start(&storage_cplane);
+
+    // start proxy
+    let _proxy = node.start_proxy(wal_acceptors);

    // check basic work with table
    node.safe_psql(
@@ -87,83 +43,24 @@ fn test_acceptors_normal_work() {
    // check wal files equality
 }

-// Run page server and multiple safekeepers, and multiple compute nodes running
-// against different timelines.
-#[test]
-fn test_many_timelines() {
-    // Initialize a new repository, and set up WAL safekeepers and page server.
-    const REDUNDANCY: usize = 3;
-    const N_TIMELINES: usize = 5;
-    let local_env = integration_tests::create_test_env("test_many_timelines");
-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
-    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
-
-    // Create branches
-    let mut timelines: Vec<String> = Vec::new();
-    timelines.push("main".to_string());
-
-    for i in 1..N_TIMELINES {
-        let branchname = format!("experimental{}", i);
-        storage_cplane
-            .pageserver
-            .branch_create(&branchname, "main")
-            .unwrap();
-        timelines.push(branchname);
-    }
-
-    // start postgres on each timeline
-    let mut nodes = Vec::new();
-    for tli_name in timelines {
-        let node = start_node_with_wal_proposer(&tli_name, &mut compute_cplane, &wal_acceptors);
-        nodes.push(node.clone());
-    }
-
-    // create schema
-    for node in &nodes {
-        node.safe_psql(
-            "postgres",
-            "CREATE TABLE t(key int primary key, value text)",
-        );
-    }
-
-    // Populate data
-    for node in &nodes {
-        node.safe_psql(
-            "postgres",
-            "INSERT INTO t SELECT generate_series(1,100000), 'payload'",
-        );
-    }
-
-    // Check data
-    for node in &nodes {
-        let count: i64 = node
-            .safe_psql("postgres", "SELECT sum(key) FROM t")
-            .first()
-            .unwrap()
-            .get(0);
-        println!("sum = {}", count);
-        assert_eq!(count, 5000050000);
-    }
-}
-
 // Majority is always alive
 #[test]
 fn test_acceptors_restarts() {
-    let local_env = integration_tests::create_test_env("test_acceptors_restarts");
-
    // Start pageserver that reads WAL directly from that postgres
    const REDUNDANCY: usize = 3;
    const FAULT_PROBABILITY: f32 = 0.01;

-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
+    let storage_cplane = StorageControlPlane::fault_tolerant(REDUNDANCY);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
    let mut rng = rand::thread_rng();

-    // start postgres
-    let node = start_node_with_wal_proposer("main", &mut compute_cplane, &wal_acceptors);
+    // start postgre
+    let node = compute_cplane.new_master_node();
+    node.start(&storage_cplane);

+    // start proxy
+    let _proxy = node.start_proxy(wal_acceptors);
    let mut failed_node: Option<usize> = None;

    // check basic work with table
@@ -183,7 +80,7 @@ fn test_acceptors_restarts() {
            } else {
                let node: usize = rng.gen_range(0..REDUNDANCY);
                failed_node = Some(node);
-                storage_cplane.wal_acceptors[node].stop().unwrap();
+                storage_cplane.wal_acceptors[node].stop();
            }
        }
    }
@@ -196,10 +93,10 @@ fn test_acceptors_restarts() {
    assert_eq!(count, 500500);
 }

-fn start_acceptor(cplane: &Arc<TestStorageControlPlane>, no: usize) {
+fn start_acceptor(cplane: &Arc<StorageControlPlane>, no: usize) {
    let cp = cplane.clone();
    thread::spawn(move || {
-        thread::sleep(time::Duration::from_secs(DOWNTIME));
+        thread::sleep(time::Duration::from_secs(1));
        cp.wal_acceptors[no].start();
    });
 }
@@ -208,18 +105,20 @@ fn start_acceptor(cplane: &Arc<TestStorageControlPlane>, no: usize) {
 // them again and check that nothing was losed. Repeat.
 // N_CRASHES env var
 #[test]
-fn test_acceptors_unavailability() {
-    let local_env = integration_tests::create_test_env("test_acceptors_unavailability");
-
+fn test_acceptors_unavalability() {
    // Start pageserver that reads WAL directly from that postgres
    const REDUNDANCY: usize = 2;

-    let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
+    let storage_cplane = StorageControlPlane::fault_tolerant(REDUNDANCY);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();

-    // start postgres
-    let node = start_node_with_wal_proposer("main", &mut compute_cplane, &wal_acceptors);
+    // start postgre
+    let node = compute_cplane.new_master_node();
+    node.start(&storage_cplane);
+
+    // start proxy
+    let _proxy = node.start_proxy(wal_acceptors);

    // check basic work with table
    node.safe_psql(
@@ -230,26 +129,21 @@ fn test_acceptors_unavailability() {
    psql.execute("INSERT INTO t values (1, 'payload')", &[])
        .unwrap();

-    // Shut down all wal acceptors
-    storage_cplane.wal_acceptors[0].stop().unwrap();
+    storage_cplane.wal_acceptors[0].stop();
    let cp = Arc::new(storage_cplane);
    start_acceptor(&cp, 0);
    let now = SystemTime::now();
    psql.execute("INSERT INTO t values (2, 'payload')", &[])
        .unwrap();
-    // Here we check that the query above was hanging
-    // while wal_acceptor was unavailiable
-    assert!(now.elapsed().unwrap().as_secs() >= DOWNTIME);
+    assert!(now.elapsed().unwrap().as_secs() > 1);
    psql.execute("INSERT INTO t values (3, 'payload')", &[])
        .unwrap();

-    cp.wal_acceptors[1].stop().unwrap();
+    cp.wal_acceptors[1].stop();
    start_acceptor(&cp, 1);
    psql.execute("INSERT INTO t values (4, 'payload')", &[])
        .unwrap();
-    // Here we check that the query above was hanging
-    // while wal_acceptor was unavailiable
-    assert!(now.elapsed().unwrap().as_secs() >= 2 * DOWNTIME);
+    assert!(now.elapsed().unwrap().as_secs() > 2);

    psql.execute("INSERT INTO t values (5, 'payload')", &[])
        .unwrap();
@@ -260,21 +154,19 @@ fn test_acceptors_unavailability() {
        .unwrap()
        .get(0);
    println!("sum = {}", count);
-    // Ensure that all inserts succeeded.
-    // Including ones that were waiting for wal acceptor restart.
    assert_eq!(count, 15);
 }

-fn simulate_failures(cplane: Arc<TestStorageControlPlane>) {
+fn simulate_failures(cplane: &Arc<StorageControlPlane>) {
    let mut rng = rand::thread_rng();
    let n_acceptors = cplane.wal_acceptors.len();
    let failure_period = time::Duration::from_secs(1);
-    while cplane.is_running() {
+    loop {
        thread::sleep(failure_period);
        let mask: u32 = rng.gen_range(0..(1 << n_acceptors));
        for i in 0..n_acceptors {
            if (mask & (1 << i)) != 0 {
-                cplane.wal_acceptors[i].stop().unwrap();
+                cplane.wal_acceptors[i].stop();
            }
        }
        thread::sleep(failure_period);
@@ -289,29 +181,29 @@ fn simulate_failures(cplane: Arc<TestStorageControlPlane>) {
 // Race condition test
 #[test]
 fn test_race_conditions() {
-    let local_env = integration_tests::create_test_env("test_race_conditions");
-
    // Start pageserver that reads WAL directly from that postgres
    const REDUNDANCY: usize = 3;

-    let storage_cplane = Arc::new(TestStorageControlPlane::fault_tolerant(
-        &local_env, REDUNDANCY,
-    ));
-    let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
+    let storage_cplane = StorageControlPlane::fault_tolerant(REDUNDANCY);
+    let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
    let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();

-    // start postgres
-    let node = start_node_with_wal_proposer("main", &mut compute_cplane, &wal_acceptors);
+    // start postgre
+    let node = compute_cplane.new_master_node();
+    node.start(&storage_cplane);
+
+    // start proxy
+    let _proxy = node.start_proxy(wal_acceptors);

    // check basic work with table
    node.safe_psql(
        "postgres",
        "CREATE TABLE t(key int primary key, value text)",
    );
-
-    let cp = storage_cplane.clone();
-    let failures_thread = thread::spawn(move || {
-        simulate_failures(cp);
+    let cplane = Arc::new(storage_cplane);
+    let cp = cplane.clone();
+    thread::spawn(move || {
+        simulate_failures(&cp);
    });

    let mut psql = node.open_psql("postgres");
@@ -326,7 +218,5 @@ fn test_race_conditions() {
        .get(0);
    println!("sum = {}", count);
    assert_eq!(count, 500500);
-
-    storage_cplane.stop();
-    failures_thread.join().unwrap();
+    cplane.stop();
 }
--- a/mgmt-console/.gitignore
+++ b/mgmt-console/.gitignore
@@ -0,0 +1,23 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+
+# testing
+/coverage
+
+# production
+/build
+
+# misc
+.DS_Store
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
--- a/mgmt-console/README
+++ b/mgmt-console/README
@@ -0,0 +1,55 @@
+Mock implementation of a management console.
+
+See demo-howto.txt for usage.
+
+Building and Installation
+-------------------------
+
+To compile Postgres:
+  sudo apt build-dep postgresql
+  sudo apt install bison flex libz-dev libssl-dev
+  sudo apt install ccache
+  sudo apt install libcurl4-openssl-dev libxml2-dev
+
+For the webapp:
+  # NOTE: This requires at least version 1.1.0 of python3-flask. That's not
+  # available in Debian Buster, need at least Bullseye.
+
+  sudo apt install python3 python3-flask python3-pip npm webpack
+  pip3 install Flask-BasicAuth
+  pip3 install boto3
+
+git clone and compile and install patched version of Postgres:
+
+  git clone https://github.com/libzenith/postgres.git
+  cd postgres
+  git checkout zenith-experiments
+  ./configure --enable-debug --enable-cassert --with-openssl --prefix=/home/heikki/pgsql-install --with-libxml CC="ccache gcc" CFLAGS="-O0"
+  make -j4 -s install
+
+Get the webapp:
+  cd ~
+  git clone https://github.com/libzenith/zenith-mgmt-console.git
+  cd zenith-mgmt-console
+  mkdir pgdatadirs
+
+
+  openssl req -new -x509 -days 365 -nodes -text -out server.crt \
+    -keyout server.key -subj "/CN=zenith-demo"
+
+For Mock S3 server (unless you want to test against a real cloud service):
+  sudo apt install python3-tornado
+
+  cd ~/zenith-mgmt-console
+  git clone https://github.com/hlinnaka/ms3.git
+
+Compile & run it:
+  npm install
+  webpack # compile React app
+
+  BASIC_AUTH_PASSWORD=<password> ./launch-local.sh
+
+
+You can view the contents of the S3 bucket with browser:
+
+http://<server>/list_bucket
--- a/mgmt-console/app.py
+++ b/mgmt-console/app.py
@@ -0,0 +1,340 @@
+from flask import request
+from flask_basicauth import BasicAuth
+from flask import render_template
+from subprocess import PIPE, STDOUT, run, Popen
+import html
+import os
+import re
+import shutil
+import logging
+import time
+
+import boto3
+from boto3.session import Session
+from botocore.client import Config
+from botocore.handlers import set_list_objects_encoding_type_url
+
+from flask import Flask
+
+import waldump
+
+
+app = Flask(__name__)
+
+app.config['BASIC_AUTH_USERNAME'] = 'zenith'
+app.config['BASIC_AUTH_PASSWORD'] = os.getenv('BASIC_AUTH_PASSWORD')
+app.config['BASIC_AUTH_FORCE'] = True
+
+basic_auth = BasicAuth(app)
+
+# S3 configuration:
+
+ENDPOINT = os.getenv('S3_ENDPOINT', 'https://localhost:9000')
+ACCESS_KEY = os.getenv('S3_ACCESSKEY', 'minioadmin')
+SECRET = os.getenv('S3_SECRET', '')
+BUCKET = os.getenv('S3_BUCKET', 'foobucket')
+
+print("Using bucket at " + ENDPOINT);
+
+#boto3.set_stream_logger('botocore', logging.DEBUG)
+
+session = Session(aws_access_key_id=ACCESS_KEY,
+                  aws_secret_access_key=SECRET,
+                  region_name=os.getenv('S3_REGION', 'auto'))
+
+# needed for google cloud?
+session.events.unregister('before-parameter-build.s3.ListObjects',
+                          set_list_objects_encoding_type_url)
+
+s3resource = session.resource('s3',
+                              endpoint_url=ENDPOINT,
+                              verify=False,
+                              config=Config(signature_version='s3v4'))
+s3bucket = s3resource.Bucket(BUCKET)
+
+s3_client = boto3.client('s3',
+                         endpoint_url=ENDPOINT,
+                         verify=False,
+                         config=Config(signature_version='s3v4'),
+                         aws_access_key_id=ACCESS_KEY,
+                         aws_secret_access_key=SECRET)
+
+
+@app.route("/")
+def index():
+    return render_template("index.html")
+
+
+@app.route("/api/waldump")
+def render_waldump():
+    return render_template("waldump.html")
+
+@app.route('/api/fetch_wal')
+def fetch_wal():
+    return waldump.fetch_wal(request, s3bucket);
+
+@app.route("/api/server_status")
+def server_status():
+    dirs = os.listdir("pgdatadirs")
+    dirs.sort()
+
+    primary = None
+    standbys = []
+
+    for dirname in dirs:
+        
+        result = run("pg_ctl status -D pgdatadirs/" + dirname, stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
+
+        srv = {
+            'datadir': dirname,
+            'status': result.stdout,
+            'port': None
+        }
+
+        if dirname == 'primary':
+            primary = srv;
+            primary['port'] = 5432;
+        else:
+            standby_match = re.search('standby_([0-9]+)', dirname)
+            if standby_match:
+                srv['port'] = int(standby_match.group(1))
+
+            standbys.append(srv);
+
+    return {'primary': primary, 'standbys': standbys}
+
+@app.route('/api/list_bucket')
+def list_bucket():
+
+    response = 'cloud bucket contents:<br>\n'
+
+    for file in s3bucket.objects.all():
+        response = response + html.escape(file.key) + '<br>\n'
+
+    return response
+
+def walpos_str(walpos):
+    return '{:X}/{:X}'.format(walpos >> 32, walpos & 0xFFFFFFFF)
+
+@app.route('/api/bucket_summary')
+def bucket_summary():
+
+    nonrelimages = []
+    minwal = int(0)
+    maxwal = int(0)
+    minseqwal = int(0)
+    maxseqwal = int(0)
+
+    for file in s3bucket.objects.all():
+        path = file.key
+        match = re.search('nonreldata/nonrel_([0-9A-F]+).tar', path)
+        if match:
+            walpos = int(match.group(1), 16)
+            nonrelimages.append(walpos_str(walpos))
+
+        match = re.search('nonreldata/nonrel_([0-9A-F]+)-([0-9A-F]+)', path)
+        if match:
+            endwal = int(match.group(2), 16)
+            if endwal > maxwal:
+                maxwal = endwal
+
+        match = re.search('walarchive/([0-9A-F]{8})([0-9A-F]{8})([0-9A-F]{8})', path)
+        if match:
+            tli = int(match.group(1), 16)
+            logno = int(match.group(2), 16)
+            segno = int(match.group(3), 16)
+            # FIXME: this assumes default 16 MB wal segment size
+            logsegno = logno * (0x100000000 / (16*1024*1024)) + segno
+
+            seqwal = int((logsegno + 1) * (16*1024*1024))
+
+            if seqwal > maxseqwal:
+                maxseqwal = seqwal;
+            if minseqwal == 0 or seqwal < minseqwal:
+                minseqwal = seqwal;
+
+    return {
+        'nonrelimages': nonrelimages,
+        'minwal': walpos_str(minwal),
+        'maxwal': walpos_str(maxwal),
+        'minseqwal': walpos_str(minseqwal),
+        'maxseqwal': walpos_str(maxseqwal)
+        }
+
+def print_cmd_result(cmd_result):
+    return print_cmd_result_ex(cmd_result.args, cmd_result.returncode, cmd_result.stdout)
+
+def print_cmd_result_ex(cmd, returncode, stdout):
+    res = ''
+    res += 'ran command:\n' + str(cmd) + '\n'
+    res += 'It returned code ' + str(returncode) + '\n'
+    res += '\n'
+    res += 'stdout/stderr:\n'
+    res += stdout
+
+    return res
+
+@app.route('/api/init_primary', methods=['GET', 'POST'])
+def init_primary():
+    
+    initdb_result = run("initdb -D pgdatadirs/primary --username=zenith --pwfile=pg-password.txt", stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
+    if initdb_result.returncode != 0:
+        return print_cmd_result(initdb_result)
+    
+    # Append archive_mode and archive_command and port to postgresql.conf
+    f=open("pgdatadirs/primary/postgresql.conf", "a+")
+    f.write("listen_addresses='*'\n")
+    f.write("archive_mode=on\n")
+    f.write("archive_command='zenith_push --archive-wal-path=%p --archive-wal-fname=%f'\n")
+    f.write("ssl=on\n")
+    f.close()
+
+    f=open("pgdatadirs/primary/pg_hba.conf", "a+")
+    f.write("# allow SSL connections with password from anywhere\n")
+    f.write("hostssl    all             all             0.0.0.0/0           md5\n")
+    f.write("hostssl    all             all             ::0/0               md5\n")
+    f.close()
+
+    shutil.copyfile("server.crt", "pgdatadirs/primary/server.crt")
+    shutil.copyfile("server.key", "pgdatadirs/primary/server.key")
+    os.chmod("pgdatadirs/primary/server.key", 0o0600)
+    
+    start_proc = Popen(args=["pg_ctl", "start", "-D", "pgdatadirs/primary", "-l", "pgdatadirs/primary/log"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
+    start_rc = start_proc.wait()
+    start_stdout, start_stderr = start_proc.communicate()
+
+    responsestr = print_cmd_result(initdb_result) + '\n'
+    responsestr += print_cmd_result_ex(start_proc.args, start_rc, start_stdout)
+
+    return responsestr
+
+@app.route('/api/zenith_push', methods=['GET', 'POST'])
+def zenith_push():
+    # Stop the primary if it's running
+    stop_result = run(args=["pg_ctl", "stop", "-D", "pgdatadirs/primary"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
+    
+    # Call zenith_push
+    push_result = run("zenith_push -D pgdatadirs/primary", stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
+
+    # Restart the primary
+    start_proc = Popen(args=["pg_ctl", "start", "-D", "pgdatadirs/primary", "-l", "pgdatadirs/primary/log"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
+    start_rc = start_proc.wait()
+    start_stdout, start_stderr = start_proc.communicate()
+    
+    responsestr = print_cmd_result(stop_result) + '\n'
+    responsestr += print_cmd_result(push_result) + '\n'
+    responsestr += print_cmd_result_ex(start_proc.args, start_rc, start_stdout) + '\n'
+
+    return responsestr
+
+@app.route('/api/create_standby', methods=['GET', 'POST'])
+def create_standby():
+
+    walpos = request.form.get('walpos')
+    if not walpos:
+        return 'no walpos'
+    
+    dirs = os.listdir("pgdatadirs")
+
+    last_port = 5432
+
+    for dirname in dirs:
+
+        standby_match = re.search('standby_([0-9]+)', dirname)
+        if standby_match:
+            port = int(standby_match.group(1))
+            if port > last_port:
+                last_port = port
+
+    standby_port = last_port + 1
+
+    standby_dir = "pgdatadirs/standby_" + str(standby_port)
+
+    # Call zenith_restore
+    restore_result = run(["zenith_restore", "--end=" + walpos, "-D", standby_dir], stdout=PIPE, stderr=STDOUT, encoding='latin1')
+    responsestr = print_cmd_result(restore_result)
+
+    if restore_result.returncode == 0:
+        # Append hot_standby and port to postgresql.conf
+        f=open(standby_dir + "/postgresql.conf", "a+")
+        f.write("hot_standby=on\n")
+        f.write("port=" + str(standby_port) + "\n")
+        f.close()
+
+        start_proc = Popen(args=["pg_ctl", "start", "-D", standby_dir, "-l", standby_dir + "/log"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
+        start_rc = start_proc.wait()
+        start_stdout, start_stderr = start_proc.communicate()
+        responsestr += '\n\n' + print_cmd_result_ex(start_proc.args, start_rc, start_stdout)
+
+    return responsestr
+
+@app.route('/api/destroy_server', methods=['GET', 'POST'])
+def destroy_primary():
+
+    datadir = request.form.get('datadir')
+
+    # Check that the datadir parameter doesn't contain anything funny.
+    if not re.match("^[A-Za-z0-9_-]+$", datadir):
+        raise Exception('invalid datadir: ' + datadir)
+    
+    # Stop the server if it's running
+    stop_result = run(args=["pg_ctl", "stop", "-m", "immediate", "-D", "pgdatadirs/" + datadir], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
+
+    shutil.rmtree('pgdatadirs/' + datadir, ignore_errors=True)
+
+    responsestr = print_cmd_result(stop_result) + '\n'
+    responsestr += 'Deleted datadir ' + datadir + '.\n'
+
+    return responsestr
+
+@app.route('/api/restore_primary', methods=['GET', 'POST'])
+def restore_primary():
+
+    # Call zenith_restore
+    restore_result = run(["zenith_restore", "-D", "pgdatadirs/primary"], stdout=PIPE, stderr=STDOUT, encoding='latin1')
+    responsestr = print_cmd_result(restore_result)
+
+    # Append restore_command to postgresql.conf, so that it can find the last raw WAL segments
+    f=open("pgdatadirs/primary/postgresql.conf", "a+")
+    f.write("listen_addresses='*'\n")
+    f.write("restore_command='zenith_restore --archive-wal-path=%p --archive-wal-fname=%f'\n")
+    f.write("ssl=on\n")
+    f.close()
+    
+    if restore_result.returncode == 0:
+        start_proc = Popen(args=["pg_ctl", "start", "-D", "pgdatadirs/primary", "-l", "pgdatadirs/primary/log"], stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=False, start_new_session=True, close_fds=True)
+        start_rc = start_proc.wait()
+        start_stdout, start_stderr = start_proc.communicate()
+        responsestr += print_cmd_result_ex(start_proc.args, start_rc, start_stdout)
+
+    return responsestr
+
+@app.route('/api/slicedice', methods=['GET', 'POST'])
+def run_slicedice():
+    result = run("zenith_slicedice", stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
+    
+    responsestr = print_cmd_result(result)
+
+    return responsestr
+
+@app.route('/api/reset_demo', methods=['POST'])
+def reset_all():
+    result = run("pkill -9 postgres", stdout=PIPE, stderr=STDOUT, universal_newlines=True, shell=True)
+
+    dirs = os.listdir("pgdatadirs")
+    for dirname in dirs:
+        shutil.rmtree('pgdatadirs/' + dirname)
+        
+    for file in s3bucket.objects.all():
+        s3_client.delete_object(Bucket = BUCKET, Key = file.key)
+
+    responsestr = print_cmd_result(result) + '\n'
+    responsestr += '''
+Deleted all Postgres datadirs.
+Deleted all files in object storage bucket.
+'''
+
+    return responsestr
+
+if __name__ == '__main__':
+    app.run()
--- a/mgmt-console/babel.config.js
+++ b/mgmt-console/babel.config.js
@@ -0,0 +1,3 @@
+module.exports = {
+    presets: ["@babel/preset-env", "@babel/preset-react"],
+};
--- a/mgmt-console/demo-howto.txt
+++ b/mgmt-console/demo-howto.txt
@@ -0,0 +1,67 @@
+Mock implementation of a management console.
+
+This isn't very different from a "normal" PostgreSQL installation with
+a base backup and WAL archive. The main user-visible difference is
+that when you create a standby server, we don't restore the whole data
+directory, but only the "non-relation" files. Relation files are
+restored on demand, when they're accessed the first time. That makes
+the "create standby" operation is very fast, but with some delay when
+you connect and start running queries instead.  Most visible if you
+have a large database. (However, see note below about large databases)
+
+Note: lots of things are broken/unsafe. Things will fail if a table is
+larger than 1 GB. Or if there are more than 1000 files in the cloud
+bucket.
+
+How to use this demo:
+
+1. If there are any leftovers from previous runs, reset by clicking
+   the RESET DEMO button.  This kills and deletes all Postgres servers,
+   and empties the cloud storage bucket
+
+2. Create primary server by clicking on the "Init primary" button
+
+3. Push a base image of the primary to cloud storage, by clicking the
+   "push base image" button.  (This takes about 30 seconds, be
+   patient)
+
+4. Connect to primary with psql, and create a test table with a little data.
+
+      psql postgres  -p5432 -U zenith -h<host>
+
+      create table mytable (i int4);
+
+      insert into mytable values (1);
+      select pg_switch_wal();
+
+   The Postgres password is the same as for the management console.
+
+3. Now that there's a new WAL segment in the arhive, we can "slice &
+   dice" it. Click on the "Slice & dice button".
+
+4. Perform more updates on the primary, to generate more WAL.
+
+      insert into mytable values (2); select pg_switch_wal();
+      insert into mytable values (3); select pg_switch_wal();
+      insert into mytable values (4); select pg_switch_wal();
+      insert into mytable values (5); select pg_switch_wal();
+
+5. Slice & Dice the WAL again
+
+6. Now you can create read-only standby servers at any point in the
+   WAL. Type a WAL position in the text box (or use the slider), and
+   click "Create new standby". The first standby is created at port 5433,
+   the second at port 5434, and so forth.
+
+7. Connect to the standby with "psql -p 5433". Note that it takes a
+   few seconds until the connection is established. That's because the
+   standby has to restore the basic system catalogs, like pg_database and
+   pg_authid from the backup. After connecting, you can do "\d" to list
+   tables, this will also take a few seconds, as more catalog tables are
+   restored from backup.  Subsequent commands will be faster.
+
+   Run queries in the standby:
+
+      select * from mytable;
+
+   the result depends on the LSN that you picked when you created the server.
--- a/mgmt-console/js/app.js
+++ b/mgmt-console/js/app.js
@@ -0,0 +1,463 @@
+import React, { useState, useEffect } from 'react';
+import ReactDOM from 'react-dom';
+import Loader from "react-loader-spinner";
+import { Router, Route, Link, IndexRoute, hashHistory, browserHistory } from 'react-router';
+
+function ServerStatus(props) {
+    const datadir = props.server.datadir;
+    const status = props.server.status;
+    const port = props.server.port;
+
+    return (
+	<div>
+	    <h2>{ datadir == 'primary' ? 'Primary' : datadir }</h2>
+	    status: <div className='status'>{status}</div><br/>
+	    to connect: <span className='shellcommand'>psql -h { window.location.hostname } -p { port } -U zenith postgres</span><br/>
+	</div>
+    );
+}
+
+function StandbyList(props) {
+    const bucketSummary = props.bucketSummary;
+    const standbys = props.standbys;
+    const maxwalpos = bucketSummary.maxwal ? walpos_to_int(bucketSummary.maxwal) : 0;
+
+    const [walposInput, setWalposInput] = useState({ src: 'text', value: '0/0'});
+
+    // find earliest base image
+    const minwalpos = bucketSummary.nonrelimages ? bucketSummary.nonrelimages.reduce((minpos, imgpos_str, index, array) => {
+	const imgpos = walpos_to_int(imgpos_str);
+	return (minpos == 0 || imgpos < minpos) ? imgpos : minpos;
+    }, 0) : 0;
+
+    const can_create_standby = minwalpos > 0 && maxwalpos > 0 && maxwalpos >= minwalpos;
+    var walpos_valid = true;
+
+    function create_standby() {
+	const formdata = new FormData();
+	formdata.append("walpos", walposStr);
+
+	props.startOperation('Creating new standby at ' + walposStr + '...',
+			     fetch("/api/create_standby", { method: 'POST', body: formdata }));
+    }
+
+    function destroy_standby(datadir) {
+	const formdata = new FormData();
+	formdata.append("datadir", datadir);
+	props.startOperation('Destroying ' + datadir + '...',
+			     fetch("/api/destroy_server", { method: 'POST', body: formdata }));
+    }
+
+    const handleSliderChange = (event) => {
+	setWalposInput({ src: 'slider', value: event.target.value });
+    }    
+
+    const handleWalposChange = (event) => {
+	setWalposInput({ src: 'text', value: event.target.value });
+    }
+
+    var sliderValue;
+    var walposStr;
+    if (walposInput.src == 'text')
+    {
+	const walpos = walpos_to_int(walposInput.value);
+
+	if (walpos >= minwalpos && walpos <= maxwalpos)
+	    walpos_valid = true;
+	else
+	    walpos_valid = false;
+	
+	sliderValue = Math.round((walpos - minwalpos) / (maxwalpos - minwalpos) * 100);
+	walposStr = walposInput.value;
+    }
+    else
+    {
+	const slider = walposInput.value;
+	const new_walpos = minwalpos + slider / 100 * (maxwalpos - minwalpos);
+
+	console.log('minwalpos: '+ minwalpos);
+	console.log('maxwalpos: '+ maxwalpos);
+
+	walposStr = int_to_walpos(Math.round(new_walpos));
+	walpos_valid = true;
+	console.log(walposStr);
+    }
+
+    var standbystatus = ''
+    if (standbys)
+    {
+	standbystatus = 
+	    <div>
+		{
+		    standbys.length > 0 ? 
+ 			standbys.map((server) =>
+			    <>
+				<ServerStatus key={ 'status_' + server.datadir} server={server}/>
+				<button key={ 'destroy_' + server.datadir} onClick={e => destroy_standby(server.datadir)}>Destroy standby</button>
+			    </>
+			) : "no standby servers"
+		}
+	    </div>
+    }
+
+    return (
+	<div>
+	    <h2>Standbys</h2>
+	    <button onClick={create_standby} disabled={!can_create_standby || !walpos_valid}>Create new Standby</button> at LSN 
+            <input type="text" id="walpos_input" value={ walposStr } onChange={handleWalposChange} disabled={!can_create_standby}/>
+	    <input type="range" id="walpos_slider" min="0" max="100" steps="1" value={sliderValue}  onChange={handleSliderChange} disabled={!can_create_standby}/>
+	    <br/>
+	    { standbystatus }
+	</div>
+    );
+}
+
+function ServerList(props) {
+    const primary = props.serverStatus ? props.serverStatus.primary : null;
+    const standbys = props.serverStatus ? props.serverStatus.standbys : [];
+    const bucketSummary = props.bucketSummary;
+
+    var primarystatus = '';
+
+    function destroy_primary() {
+	const formdata = new FormData();
+	formdata.append("datadir", 'primary');
+	props.startOperation('Destroying primary...',
+			     fetch("/api/destroy_server", { method: 'POST', body: formdata }));
+    }    
+
+    function restore_primary() {
+	props.startOperation('Restoring primary...',
+			     fetch("/api/restore_primary", { method: 'POST' }));
+    }    
+    
+    if (primary)
+    {
+	primarystatus =
+	    <div>
+		<ServerStatus server={primary}/>
+		<button onClick={destroy_primary}>Destroy primary</button>
+	    </div>
+    }
+    else
+    {
+	primarystatus =
+	    <div>
+		no primary server<br/>
+		<button onClick={restore_primary}>Restore primary</button>
+	    </div>
+    }
+
+    return (
+	<>
+	    { primarystatus }
+	    <StandbyList standbys={standbys} startOperation={props.startOperation} bucketSummary={props.bucketSummary}/>
+	    <p className="todo">
+		Should we list the WAL safekeeper nodes here? Or are they part of the Storage? Or not visible to users at all?
+	    </p>
+	</>
+    );
+}
+
+function BucketSummary(props) {
+    const bucketSummary = props.bucketSummary;
+    const startOperation = props.startOperation;
+
+    function slicedice() {
+	startOperation('Slicing sequential WAL to per-relation WAL...',
+		       fetch("/api/slicedice", { method: 'POST' }));
+    }
+    
+    if (!bucketSummary.nonrelimages)
+    {
+	return <>loading...</>
+    }
+
+    return (
+	<div>
+	    <div>Base images at following WAL positions:
+		<ul>
+		    {bucketSummary.nonrelimages.map((img) => (
+			<li key={img}>{img}</li>
+		    ))}
+		</ul>
+	    </div>
+            Sliced WAL is available up to { bucketSummary.maxwal }<br/>
+	    Raw WAL is available up to { bucketSummary.maxseqwal }<br/>
+
+	    <br/>
+	    <button onClick={slicedice}>Slice & Dice WAL</button>
+	    <p className="todo">
+		Currently, the slicing or "sharding" of the WAL needs to be triggered manually, by clicking the above button.
+		<br/>
+		TODO: make it a continuous process that runs in the WAL safekeepers, or in the Page Servers, or as a standalone service.
+	    </p>
+	</div>
+    );
+}
+
+function ProgressIndicator()
+{
+    return (
+	<div>
+	    <Loader
+		type="Puff"
+		color="#00BFFF"
+		height={100}
+		width={100}
+	    />
+	</div>
+    )
+}
+
+function walpos_to_int(walpos)
+{
+    const [hi, lo] = walpos.split('/');
+
+    return parseInt(hi, 16) + parseInt(lo, 16);
+}
+
+function int_to_walpos(x)
+{
+    console.log('converting ' + x);
+    return (Math.floor((x / 0x100000000)).toString(16) + '/' + (x % 0x100000000).toString(16)).toUpperCase();
+}
+
+function OperationStatus(props) {
+    const lastOperation = props.lastOperation;
+    const inProgress = props.inProgress;
+    const operationResult = props.operationResult;
+
+    if (lastOperation)
+    {
+	return (
+	    <div><h2>Last operation:</h2>
+		<div>{lastOperation} { (!inProgress && lastOperation) ? 'done!' : '' }</div>
+		<div className='result'>
+		    {inProgress ? <ProgressIndicator/> : <pre>{operationResult}</pre>}
+		</div>
+	    </div>
+	);
+    }
+    else
+	return '';
+}
+
+function ActionButtons(props) {
+
+    const startOperation = props.startOperation;
+    const bucketSummary = props.bucketSummary;
+    
+    function reset_demo() {
+	startOperation('resetting everything...',
+		       fetch("/api/reset_demo", { method: 'POST' }));
+    }
+
+    function init_primary() {
+	startOperation('Initializing new primary...',
+		       fetch("/api/init_primary", { method: 'POST' }));
+    }
+
+    function zenith_push() {
+	startOperation('Pushing new base image...',
+		       fetch("/api/zenith_push", { method: 'POST' }));
+    }
+	
+    return (
+	<div>
+	    <p className="todo">
+		RESET DEMO deletes everything in the storage bucket, and stops and destroys all servers. This resets the whole demo environment to the initial state.
+	    </p>
+	    <button onClick={reset_demo}>RESET DEMO</button>
+	    <p className="todo">
+		Init Primary runs initdb to create a new primary server. Click this after Resetting the demo.
+	    </p>
+
+	    <button onClick={init_primary}>Init primary</button>
+
+	    <p className="todo">
+		Push Base Image stops the primary, copies the current state of the primary to the storage bucket as a new base backup, and restarts the primary.
+		<br/>
+		TODO: This should be handled by a continuous background process, probably running in the storage nodes. And without having to shut down the cluster, of course.
+	    </p>
+
+	    <button onClick={zenith_push}>Push base image</button>
+
+	</div>
+    );
+}
+
+function Sidenav(props)
+{
+    const toPage = (page) => (event) => {
+	//event.preventDefault()
+	props.switchPage(page);
+    };
+    return (
+	<div>
+	    <h3 className="sidenav-item">Menu</h3>
+	    <a href="#servers" onClick={toPage('servers')} className="sidenav-item">Servers</a>
+	    <a href="#storage" onClick={toPage('storage')} className="sidenav-item">Storage</a>
+	    <a href="#snapshots" onClick={toPage('snapshots')} className="sidenav-item">Snapshots</a>
+	    <a href="#demo" onClick={toPage('demo')} className="sidenav-item">Demo</a>
+	    <a href="#import" onClick={toPage('import')}  className="sidenav-item">Import / Export</a>
+	    <a href="#jobs" onClick={toPage('jobs')} className="sidenav-item">Jobs</a>
+	</div>
+    );
+}
+
+function App()
+{
+    const [page, setPage] = useState('servers');
+    const [serverStatus, setServerStatus] = useState({});
+    const [bucketSummary, setBucketSummary] = useState({});
+    const [lastOperation, setLastOperation] = useState('');
+    const [inProgress, setInProgress] = useState('');
+    const [operationResult, setOperationResult] = useState('');
+
+    useEffect(() => {
+	reloadStatus();
+    }, []);
+
+    function startOperation(operation, promise)
+    {
+	promise.then(result => result.text()).then(resultText => {
+	    operationFinished(resultText);
+	});
+	
+	setLastOperation(operation);
+	setInProgress(true);
+	setOperationResult('');
+    }
+
+    function operationFinished(result)
+    {
+	setInProgress(false);
+	setOperationResult(result);
+	reloadStatus();
+    }
+
+    function clearOperation()
+    {
+	setLastOperation('')
+	setInProgress('');
+	setOperationResult('');
+	console.log("cleared");
+    }
+    
+    function reloadStatus()
+    {
+	fetch('/api/server_status').then(res => res.json()).then(data => {
+	    setServerStatus(data);
+	});
+
+	fetch('/api/bucket_summary').then(res => res.json()).then(data => {
+	    setBucketSummary(data);
+	});
+    }
+
+    const content = () => {
+	console.log(page);
+	if (page === 'servers') {
+	    return (
+		<>
+		    <h1>Server status</h1>
+		    <ServerList startOperation={ startOperation }
+				serverStatus={ serverStatus }
+				bucketSummary={ bucketSummary }/>
+		</>
+	    );
+	} else if (page === 'storage') {
+	    return (
+		<>
+		    <h1>Storage bucket status</h1>
+		    <BucketSummary startOperation={ startOperation }
+				   bucketSummary={ bucketSummary }/>
+		</>
+	    );
+	} else if (page === 'snapshots') {
+	    return (
+		<>
+		    <h1>Snapshots</h1>
+		    <p className="todo">
+			In Zenith, snapshots are just specific points (LSNs) in the WAL history, with a label. A snapshot prevents garbage collecting old data that's still needed to reconstruct the database at that LSN.
+		    </p>
+		    <p className="todo">
+			TODO:
+			<ul>
+			    <li>List existing snapshots</li>
+			    <li>Create new snapshot manually, from current state or from a given LSN</li>
+			    <li>Drill into the WAL stream to see what have happened. Provide tools for e.g. finding point where a table was dropped</li>
+			    <li>Create snapshots automatically based on events in the WAL, like if you call pg_create_restore_point(() in the primary</li>
+			    <li>Launch new reader instance at a snapshot</li>
+			    <li>Export snapshot</li>
+			    <li>Rollback cluster to a snapshot</li>
+			</ul>
+		    </p>
+		</>
+	    );
+	} else if (page === 'demo') {
+	    return (
+		<>
+		    <h1>Misc actions</h1>
+		    <ActionButtons startOperation={ startOperation }
+				   bucketSummary={ bucketSummary }/>
+		</>
+	    );
+	} else if (page === 'import') {
+	    return (
+		<>
+		    <h1>Import & Export tools</h1>
+		    <p className="TODO">TODO:
+			<ul>
+			    <li>Initialize database from existing backup (pg_basebackup, WAL-G, pgbackrest)</li>
+			    <li>Initialize from a pg_dump or other SQL script</li>
+			    <li>Launch batch job to import data files from S3</li>
+			    <li>Launch batch job to export database with pg_dump to S3</li>
+			</ul>
+			These jobs can be run in against reader processing nodes. We can even
+			spawn a new reader node dedicated to a job, and destry it when the job is done.
+		    </p>
+		</>
+	    );
+	} else if (page === 'jobs') {
+	    return (
+		<>
+		    <h1>Batch jobs</h1>
+		    <p className="TODO">TODO:
+			<ul>
+			    <li>List running jobs launched from Import & Export tools</li>
+			    <li>List other batch jobs launched by the user</li>
+			    <li>Launch new batch jobs</li>
+			</ul>
+		    </p>
+		</>
+	    );
+	}
+    }
+
+    function switchPage(page)
+    {
+	console.log("topage " + page);
+	setPage(page)
+	clearOperation();
+    };
+
+    return (
+	<div className="row">
+	    <div className="sidenav">
+		<Sidenav switchPage={switchPage} className="column"/>
+	    </div>
+	    <div className="column">
+		<div>
+		    { content() }
+		</div>
+		<OperationStatus lastOperation={ lastOperation }
+				 inProgress = { inProgress }
+				 operationResult = { operationResult }/>
+	    </div>
+	</div>
+    );
+}
+
+ReactDOM.render(<App/>, document.getElementById('reactApp'));
--- a/mgmt-console/js/waldump.js
+++ b/mgmt-console/js/waldump.js
@@ -0,0 +1,105 @@
+import React, { useState, useEffect } from 'react';
+import ReactDOM from 'react-dom';
+import Loader from "react-loader-spinner";
+
+function walpos_to_int(walpos)
+{
+    const [hi, lo] = walpos.split('/');
+
+    return parseInt(hi, 16) + parseInt(lo, 16);
+}
+
+const palette = [
+    "#003f5c",
+    "#2f4b7c",
+    "#665191",
+    "#a05195",
+    "#d45087",
+    "#f95d6a",
+    "#ff7c43",
+    "#ffa600"];
+
+function WalRecord(props)
+{
+    const firstwalpos = props.firstwalpos;
+    const endwalpos = props.endwalpos;
+    const record = props.record;
+    const index = props.index;
+    const xidmap = props.xidmap;
+
+    const startpos = walpos_to_int(record.start)
+    const endpos = walpos_to_int(record.end)
+
+    const scale = 1000 / (16*1024*1024)
+    const startx = (startpos - firstwalpos) * scale;
+    const endx = (endpos - firstwalpos) * scale;
+
+    const xidindex = xidmap[record.xid];
+    const color = palette[index % palette.length];
+
+    const y = 5 + (xidindex) * 20 + (index % 2) * 2;
+    
+    return (
+	<line x1={ startx } y1={y} x2={endx} y2={y} stroke={ color } strokeWidth="5">
+	    <title>
+		start: { record.start } end: { record.end }
+	    </title>
+	</line>
+    )
+}
+
+function WalFile(props)
+{
+    const walContent = props.walContent;
+    const firstwalpos = props.firstwalpos;
+    const xidmap = props.xidmap;
+   
+    return <svg width="1000" height="200">
+	       {
+		   walContent.records ? 
+ 		       walContent.records.map((record, index) =>
+			   <WalRecord key={record.start} firstwalpos={firstwalpos} record={record} index={index} xidmap={xidmap}/>
+		       ) : "no records"
+	       }
+	   </svg>
+}
+
+function WalDumpApp()
+{
+    const [walContent, setWalContent] = useState({});
+
+    const filename = '00000001000000000000000C';
+
+    useEffect(() => {
+	fetch('/fetch_wal?filename='+filename).then(res => res.json()).then(data => {
+	    setWalContent(data);
+	});
+    }, []);
+
+    var firstwalpos = 0;
+    var endwalpos = 0;
+    var numxids = 0;
+    var xidmap = {};
+    if (walContent.records && walContent.records.length > 0)
+    {
+	firstwalpos = walpos_to_int(walContent.records[0].start);
+	endwalpos = firstwalpos + 16*1024*1024;
+
+	walContent.records.forEach(rec => {
+	    if (!xidmap[rec.xid])
+	    {
+		xidmap[rec.xid] = ++numxids;
+	    }
+	});
+    }
+
+    return (
+	<>
+	    <h2>{filename}</h2>
+	    <WalFile walContent={walContent} firstwalpos={firstwalpos} endwalpos={endwalpos} xidmap={xidmap}/>
+	</>
+    );
+}
+
+console.log('hey there');
+ReactDOM.render(<WalDumpApp/>, document.getElementById('waldump'));
--- a/mgmt-console/launch-google-cloud.sh
+++ b/mgmt-console/launch-google-cloud.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+#
+# NOTE: You must set the following environment variables before running this:
+#  BASIC_AUTH_PASSWORD - basic http auth password
+#  S3_ACCESSKEY
+#  S3_SECRET
+
+
+S3_ENDPOINT=https://storage.googleapis.com S3_BUCKET=zenith-testbucket PATH=/home/heikki/pgsql-install/bin:$PATH flask run --host=0.0.0.0
--- a/mgmt-console/launch-local.sh
+++ b/mgmt-console/launch-local.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+#
+# NOTE: You should set the BASIC_AUTH_PASSWORD environment variable before calling
+
+# Launch S3 server
+(cd ms3 && python3 -m ms3.app --listen-address=localhost) &
+
+FLASK_ENV=development S3_REGION=auto S3_ENDPOINT=http://localhost:9009 S3_BUCKET=zenith-testbucket PATH=/home/heikki/pgsql.fsmfork/bin:$PATH flask run --host=0.0.0.0
--- a/mgmt-console/package-lock.json
+++ b/mgmt-console/package-lock.json
--- a/mgmt-console/package.json
+++ b/mgmt-console/package.json
@@ -0,0 +1,27 @@
+{
+  "name": "starter-kit",
+  "version": "1.1.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1",
+    "build": "webpack",
+    "start": "python app.py"
+  },
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "react": "^17.0.1",
+    "react-dom": "^17.0.1",
+    "react-loader-spinner": "^4.0.0",
+    "react-router": "^5.2.0"
+  },
+  "devDependencies": {
+    "@babel/core": "^7.13.1",
+    "@babel/preset-env": "^7.13.5",
+    "@babel/preset-react": "^7.12.13",
+    "babel-loader": "^8.2.2",
+    "webpack": "^5.24.2",
+    "webpack-cli": "^4.5.0"
+  }
+}
--- a/mgmt-console/templates/index.html
+++ b/mgmt-console/templates/index.html
@@ -0,0 +1,58 @@
+<head>
+
+<style>
+  .status {
+      font-family: monospace;
+      background-color: lightgrey;
+  }
+  .shellcommand {
+      font-family: monospace;
+      background-color: lightgrey;
+  }
+  .result {
+      font-family: monospace;
+      background-color: lightgrey;
+      padding: 10px;
+  }
+
+
+  .todo   {font-style: italic;}
+
+
+  h1   {color: blue;}
+
+  .column {
+      float: left;
+      width: 50%;
+      padding: 10px;
+  }
+  /* Clear floats after the columns */
+  .row:after {
+      content: "";
+      display: table;
+      clear: both;
+  }
+
+  .sidenav {
+      float: left;
+      width: 150px;
+      padding: 10px;
+      background-color: pink;
+  }
+
+  .sidenav-item {
+      padding:10px 0px;
+      border:none;
+      display:block;
+  }
+
+</style>
+
+</head>
+
+<body>
+  <div id="reactApp"></div>
+
+  <!-- Attach React components -->
+  <script type="text/javascript" src="{{ url_for('static', filename='app_bundle.js') }}"></script>
+</body>
--- a/mgmt-console/templates/waldump.html
+++ b/mgmt-console/templates/waldump.html
@@ -0,0 +1,46 @@
+<head>
+
+<style>
+  .status {
+      font-family: monospace;
+      background-color: lightgrey;
+  }
+  .shellcommand {
+      font-family: monospace;
+      background-color: lightgrey;
+  }
+  .result {
+      font-family: monospace;
+      background-color: lightgrey;
+      padding: 10px;
+  }
+h1   {color: blue;}
+p    {color: red;}
+
+* {
+  box-sizing: border-box;
+}
+
+.row {
+  display: flex;
+}
+
+/* Create two equal columns that sits next to each other */
+.column1 {
+  flex: 30%;
+  padding: 10px;
+}
+.column2 {
+  flex: 70%;
+  padding: 10px;
+}
+</style>
+
+</head>
+
+<body>
+  <div id="waldump"></div>
+
+  <!-- Attach React components -->
+  <script type="text/javascript" src="{{ url_for('static', filename='waldump_bundle.js') }}"></script>
+</body>
--- a/mgmt-console/waldump.py
+++ b/mgmt-console/waldump.py
@@ -0,0 +1,25 @@
+#
+# This file contains work-in-progress code to visualize WAL contents.
+#
+# This is the API endpoint that calls a 'zenith_wal_to_json' executable,
+# which is a hacked version of pg_waldump that prints information about the
+# records in JSON format. The code in js/waldump.js displays it.
+#
+
+import os
+import re
+from subprocess import PIPE, STDOUT, run, Popen
+
+def fetch_wal(request, s3bucket):
+    filename = request.args.get('filename')
+    if not re.match("^[A-Za-z0-9_]+$", filename):
+        raise Exception('invalid WAL filename: ' + filename)
+
+    # FIXME: this downloads the WAL file to current dir. Use a temp dir? Pipe?
+    s3bucket.download_file('walarchive/' + filename, filename)
+
+    result = run("zenith_wal_to_json " + filename, stdout=PIPE, universal_newlines=True, shell=True)
+
+    os.unlink(filename);
+
+    return result.stdout
--- a/mgmt-console/webpack.config.js
+++ b/mgmt-console/webpack.config.js
@@ -0,0 +1,27 @@
+var webpack = require('webpack');  
+module.exports = {  
+    entry: {
+	app: './js/app.js',
+	waldump: './js/waldump.js'
+    },
+    output: {
+	filename: "[name]_bundle.js",
+	path: __dirname + '/static'
+    },
+    module: {
+	rules: [
+	    {
+		test: /\.js?$/,
+		exclude: /node_modules/,
+		use: {
+		    loader: 'babel-loader',
+		    options: {
+			presets: ['@babel/preset-env']
+		    }
+		}
+	    }
+	]
+    },
+    plugins: [
+    ]
+};
--- a/mgmt-console/zenith.py
+++ b/mgmt-console/zenith.py
@@ -0,0 +1,179 @@
+#zenith.py
+import click
+import testgres
+import os
+
+from testgres import PostgresNode
+from tabulate import tabulate
+
+zenith_base_dir = '/home/anastasia/zenith/basedir'
+
+@click.group()
+def main():
+    """Run the Zenith CLI."""
+
+@click.group()
+def pg():
+    """Db operations
+
+        NOTE: 'database' here means one postgresql node
+    """
+
+@click.command(name='create')
+@click.option('--name', required=True)
+@click.option('-s', '--storage-name', help='Name of the storage',
+                                 default='zenith-local',
+                                 show_default=True)
+@click.option('--snapshot', help='init from the snapshot. Snap is a name or URL')
+@click.option('--no-start', is_flag=True, help='Do not start created node',
+                            default=False, show_default=True)
+def pg_create(name, storage_name, snapshot, no_start):
+    """Initialize the database"""
+    node = PostgresNode()
+    base_dir = os.path.join(zenith_base_dir, 'pg', name)
+    node = testgres.get_new_node(name, base_dir=base_dir)
+    # TODO skip init, instead of that link node with storage or upload it from snapshot
+    node.init()
+    if(no_start==False):
+        node.start()
+
+@click.command(name='start')
+@click.option('--name', required=True)
+@click.option('--snapshot')
+@click.option('--read-only', is_flag=True, help='Start read-only node', show_default=True)
+def pg_start(name, snapshot, read_only):
+    """Start the database"""
+    node = PostgresNode()
+    base_dir = os.path.join(zenith_base_dir, 'pg', name)
+    node = testgres.get_new_node(name, base_dir=base_dir)
+    # TODO pass snapshot as a parameter
+    node.start()
+
+@click.command(name='stop')
+@click.option('--name', required=True)
+def pg_stop(name):
+    """Stop the database"""
+    node = PostgresNode()
+    base_dir = os.path.join(zenith_base_dir, 'pg', name)
+    node = testgres.get_new_node(name, base_dir=base_dir)
+    node.stop()
+
+@click.command(name='destroy')
+@click.option('--name', required=True)
+def pg_destroy(name):
+    """Drop the database"""
+    node = PostgresNode()
+    base_dir = os.path.join(zenith_base_dir, 'pg', name)
+    node = testgres.get_new_node(name, base_dir=base_dir)
+    node.cleanup()
+
+@click.command(name='list')
+def pg_list():
+    """List existing databases"""
+    dirs = os.listdir(os.path.join(zenith_base_dir, 'pg'))
+    path={}
+    status={}
+    data=[]
+
+    for dirname in dirs:
+        path[dirname] = os.path.join(zenith_base_dir, 'pg', dirname)
+        fname = os.path.join( path[dirname], 'data/postmaster.pid')
+        try:
+            f = open(fname,'r')
+            status[dirname] = f.readlines()[-1]
+        except OSError as err:
+            status[dirname]='inactive'
+        data.append([dirname , status[dirname], path[dirname]])
+
+    print(tabulate(data, headers=['Name', 'Status', 'Path']))
+
+pg.add_command(pg_create)
+pg.add_command(pg_destroy)
+pg.add_command(pg_start)   
+pg.add_command(pg_stop)   
+pg.add_command(pg_list)
+
+
+
+@click.group()
+def storage():
+    """Storage operations"""
+
+@click.command(name='attach')
+@click.option('--name')
+def storage_attach(name):
+    """Attach the storage"""
+
+@click.command(name='detach')
+@click.option('--name')
+@click.option('--force', is_flag=True, show_default=True)
+def storage_detach(name):
+    """Detach the storage"""
+
+@click.command(name='list')
+def storage_list():
+    """List existing storages"""
+
+storage.add_command(storage_attach)
+storage.add_command(storage_detach)
+storage.add_command(storage_list)
+
+@click.group()
+def snapshot():
+    """Snapshot operations"""
+
+@click.command(name='create')
+def snapshot_create():
+    """Create new snapshot"""
+
+@click.command(name='destroy')
+def snapshot_destroy():
+    """Destroy the snapshot"""
+
+@click.command(name='pull')
+def snapshot_pull():
+    """Pull remote snapshot"""
+
+@click.command(name='push')
+def snapshot_push():
+    """Push snapshot to remote"""
+
+@click.command(name='import')
+def snapshot_import():
+    """Convert given format to zenith snapshot"""
+
+@click.command(name='export')
+def snapshot_export():
+    """Convert zenith snapshot to PostgreSQL compatible format"""
+
+snapshot.add_command(snapshot_create)
+snapshot.add_command(snapshot_destroy)
+snapshot.add_command(snapshot_pull)
+snapshot.add_command(snapshot_push)
+snapshot.add_command(snapshot_import)
+snapshot.add_command(snapshot_export)
+
+@click.group()
+def wal():
+    """WAL operations"""
+
+@click.command()
+def wallist(name="list"):
+    """List WAL files"""
+
+wal.add_command(wallist)
+
+
+@click.command()
+def console():
+    """Open web console"""
+
+main.add_command(pg)
+main.add_command(storage)
+main.add_command(snapshot)
+main.add_command(wal)
+main.add_command(console)
+
+
+if __name__ == '__main__':
+    main()
--- a/pageserver/Cargo.lock
+++ b/pageserver/Cargo.lock
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -8,10 +8,12 @@ edition = "2018"

 [dependencies]
 chrono = "0.4.19"
+crossbeam-channel = "0.5.0"
 rand = "0.8.3"
 regex = "1.4.5"
 bytes = "1.0.1"
 byteorder = "1.4.3"
+fs2 = "0.4.3"
 futures = "0.3.13"
 lazy_static = "1.4.0"
 slog-stdlog = "4.1.0"
@@ -24,24 +26,11 @@ clap = "2.33.0"
 termion = "1.5.6"
 tui = "0.14.0"
 daemonize = "0.4.1"
-rust-s3 = { version = "0.27.0-rc4", features = ["no-verify-ssl"] }
+rust-s3 = { git = "https://github.com/hlinnaka/rust-s3", features = ["no-verify-ssl"] }
 tokio = { version = "1.3.0", features = ["full"] }
 tokio-stream = { version = "0.1.4" }
-postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-rocksdb = "0.16.0"
+tokio-postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
+postgres-protocol = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
+postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
 anyhow = "1.0"
 crc32c = "0.6.0"
-walkdir = "2"
-thiserror = "1.0"
-hex = "0.4.3"
-tar = "0.4.33"
-parse_duration = "2.1.1"
-serde = { version = "1.0", features = ["derive"] }
-serde_json = "1"
-fs_extra = "1.2.0"
-
-postgres_ffi = { path = "../postgres_ffi" }
-zenith_utils = { path = "../zenith_utils" }
-workspace_hack = { path = "../workspace_hack" }
--- a/pageserver/build.rs
+++ b/pageserver/build.rs
@@ -0,0 +1,41 @@
+//
+//   Triggers postgres build if there is no postgres binary present at
+// 'REPO_ROOT/tmp_install/bin/postgres'.
+//
+//   I can see a lot of disadvantages with such automatization and main
+// advantage here is ability to build everything and run integration tests
+// in a bare repo by running 'cargo test'.
+//
+//   We can interceipt whether it is debug or release build and run
+// corresponding pg build. But it seems like an overkill for now.
+//
+// Problem #1 -- language server in my editor likes calling 'cargo build'
+// by himself. So if I delete tmp_install directory it would magically reappear
+// after some time. During this compilation 'cargo build' may whine about
+// "waiting for file lock on build directory".
+//
+// Problem #2 -- cargo build would run this only if something is changed in
+// the crate.
+//
+//   And generally speaking postgres is not a build dependency for the pageserver,
+// just for integration tests. So let's not mix that. I'll leave this file in
+// place for some time just in case if anybody would start doing the same.
+//
+
+// use std::path::Path;
+// use std::process::{Command};
+
+fn main() {
+    // // build some postgres if it is not done none yet
+    // if !Path::new("../tmp_install/bin/postgres").exists() {
+    //     let make_res = Command::new("make")
+    //         .arg("postgres")
+    //         .env_clear()
+    //         .status()
+    //         .expect("failed to execute 'make postgres'");
+
+    //     if !make_res.success() {
+    //         panic!("postgres build failed");
+    //     }
+    // }
+}
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -1,399 +0,0 @@
-use crate::ZTimelineId;
-use log::*;
-use std::io::Write;
-use std::sync::Arc;
-use std::time::SystemTime;
-use tar::{Builder, Header};
-use walkdir::WalkDir;
-use bytes::{BufMut, BytesMut};
-
-use crate::repository::{BufferTag, RelTag, Timeline};
-use postgres_ffi::relfile_utils::*;
-use postgres_ffi::*;
-use zenith_utils::lsn::Lsn;
-
-fn new_tar_header(path: &str, size: u64) -> anyhow::Result<Header> {
-    let mut header = Header::new_gnu();
-    header.set_size(size);
-    header.set_path(path)?;
-    header.set_mode(0b110000000);
-    header.set_mtime(
-        SystemTime::now()
-            .duration_since(SystemTime::UNIX_EPOCH)
-            .unwrap()
-            .as_secs(),
-    );
-    header.set_cksum();
-    Ok(header)
-}
-
-//
-// Generate SRLU segment files from repository
-//
-fn add_slru_segments(
-    ar: &mut Builder<&mut dyn Write>,
-    timeline: &Arc<dyn Timeline>,
-    path: &str,
-    forknum: u8,
-    lsn: Lsn,
-) -> anyhow::Result<()> {
-    let rel = RelTag {
-        spcnode: 0,
-        dbnode: 0,
-        relnode: 0,
-        forknum,
-    };
-    let (first, last) = timeline.get_range(rel, lsn)?;
-    const SEG_SIZE: usize =
-        pg_constants::BLCKSZ as usize * pg_constants::SLRU_PAGES_PER_SEGMENT as usize;
-    let mut seg_buf = [0u8; SEG_SIZE];
-    let mut curr_segno: Option<u32> = None;
-    for page in first..last {
-        let tag = BufferTag { rel, blknum: page };
-        let img = timeline.get_page_at_lsn(tag, lsn)?;
-        // Zero length image indicates truncated segment: just skip it
-        if img.len() != 0 {
-            assert!(img.len() == pg_constants::BLCKSZ as usize);
-
-            let segno = page / pg_constants::SLRU_PAGES_PER_SEGMENT;
-            if curr_segno.is_some() && curr_segno.unwrap() != segno {
-                let segname = format!("{}/{:>04X}", path, curr_segno.unwrap());
-                let header = new_tar_header(&segname, SEG_SIZE as u64)?;
-                ar.append(&header, &seg_buf[..])?;
-                seg_buf = [0u8; SEG_SIZE];
-            }
-            curr_segno = Some(segno);
-            let offs_start = (page % pg_constants::SLRU_PAGES_PER_SEGMENT) as usize
-                * pg_constants::BLCKSZ as usize;
-            let offs_end = offs_start + pg_constants::BLCKSZ as usize;
-            seg_buf[offs_start..offs_end].copy_from_slice(&img);
-        }
-    }
-    if curr_segno.is_some() {
-        let segname = format!("{}/{:>04X}", path, curr_segno.unwrap());
-        let header = new_tar_header(&segname, SEG_SIZE as u64)?;
-        ar.append(&header, &seg_buf[..])?;
-    }
-    Ok(())
-}
-
-//
-// Extract pg_filenode.map files from repository
-//
-fn add_relmap_files(
-    ar: &mut Builder<&mut dyn Write>,
-    timeline: &Arc<dyn Timeline>,
-    lsn: Lsn,
-    snappath: &str,
-) -> anyhow::Result<()> {
-    for db in timeline.get_databases(lsn)?.iter() {
-        let tag = BufferTag {
-            rel: *db,
-            blknum: 0,
-        };
-        let img = timeline.get_page_at_lsn(tag, lsn)?;
-        let path = if db.spcnode == pg_constants::GLOBALTABLESPACE_OID {
-            String::from("global/pg_filenode.map")
-        } else {
-            // User defined tablespaces are not supported
-            assert!(db.spcnode == pg_constants::DEFAULTTABLESPACE_OID);
-            let src_path = format!("{}/base/1/PG_VERSION", snappath);
-            let dst_path = format!("base/{}/PG_VERSION", db.dbnode);
-            ar.append_path_with_name(&src_path, &dst_path)?;
-            format!("base/{}/pg_filenode.map", db.dbnode)
-        };
-        assert!(img.len() == 512);
-        let header = new_tar_header(&path, img.len() as u64)?;
-        ar.append(&header, &img[..])?;
-    }
-    Ok(())
-}
-
-//
-// Extract twophase state files
-//
-fn add_twophase_files(
-    ar: &mut Builder<&mut dyn Write>,
-    timeline: &Arc<dyn Timeline>,
-    lsn: Lsn,
-) -> anyhow::Result<()> {
-    for xid in timeline.get_twophase(lsn)?.iter() {
-        let tag = BufferTag {
-            rel: RelTag {
-                spcnode: 0,
-                dbnode: 0,
-                relnode: 0,
-                forknum: pg_constants::PG_TWOPHASE_FORKNUM,
-            },
-            blknum: *xid,
-        };
-        let img = timeline.get_page_at_lsn(tag, lsn)?;
-		let mut buf = BytesMut::new();
-		buf.extend_from_slice(&img[..]);
-		let crc = crc32c::crc32c(&img[..]);
-		buf.put_u32_le(crc);
-        let path = format!("pg_twophase/{:>08X}", xid);
-        let header = new_tar_header(&path, buf.len() as u64)?;
-        ar.append(&header, &buf[..])?;
-    }
-    Ok(())
-}
-
-//
-// Add generated pg_control file
-//
-fn add_pgcontrol_file(
-    ar: &mut Builder<&mut dyn Write>,
-    timeline: &Arc<dyn Timeline>,
-    lsn: Lsn,
-) -> anyhow::Result<()> {
-    if let Some(checkpoint_bytes) =
-        timeline.get_page_image(BufferTag::fork(pg_constants::PG_CHECKPOINT_FORKNUM), Lsn(0))?
-    {
-        if let Some(pg_control_bytes) = timeline.get_page_image(
-            BufferTag::fork(pg_constants::PG_CONTROLFILE_FORKNUM),
-            Lsn(0),
-        )? {
-            let mut pg_control = postgres_ffi::decode_pg_control(pg_control_bytes)?;
-            let mut checkpoint = postgres_ffi::decode_checkpoint(checkpoint_bytes)?;
-
-            checkpoint.redo = lsn.0;
-            checkpoint.nextXid.value += 1;
-            // TODO: When we restart master there are no active transaction and oldestXid is
-            // equal to nextXid if there are no prepared transactions.
-            // Let's ignore them for a while...
-            checkpoint.oldestXid = checkpoint.nextXid.value as u32;
-            pg_control.checkPointCopy = checkpoint;
-            let pg_control_bytes = postgres_ffi::encode_pg_control(pg_control);
-            let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
-            ar.append(&header, &pg_control_bytes[..])?;
-        }
-    }
-    Ok(())
-}
-
-///
-/// Generate tarball with non-relational files from repository
-///
-pub fn send_tarball_at_lsn(
-    write: &mut dyn Write,
-    timelineid: ZTimelineId,
-    timeline: &Arc<dyn Timeline>,
-    lsn: Lsn,
-    snapshot_lsn: Lsn,
-) -> anyhow::Result<()> {
-    let mut ar = Builder::new(write);
-
-    let snappath = format!("timelines/{}/snapshots/{:016X}", timelineid, snapshot_lsn.0);
-
-    debug!("sending tarball of snapshot in {}", snappath);
-    for entry in WalkDir::new(&snappath) {
-        let entry = entry?;
-        let fullpath = entry.path();
-        let relpath = entry.path().strip_prefix(&snappath).unwrap();
-
-        if relpath.to_str().unwrap() == "" {
-            continue;
-        }
-
-        if entry.file_type().is_dir() {
-            trace!(
-                "sending dir {} as {}",
-                fullpath.display(),
-                relpath.display()
-            );
-            ar.append_dir(relpath, fullpath)?;
-        } else if entry.file_type().is_symlink() {
-            error!("ignoring symlink in snapshot dir");
-        } else if entry.file_type().is_file() {
-            // Shared catalogs are exempt
-            if relpath.starts_with("global/") {
-                trace!("sending shared catalog {}", relpath.display());
-                ar.append_path_with_name(fullpath, relpath)?;
-            } else if !is_rel_file_path(relpath.to_str().unwrap()) {
-                if entry.file_name() != "pg_filenode.map"
-                    && entry.file_name() != "pg_control"
-                    && !relpath.starts_with("pg_xact/")
-                    && !relpath.starts_with("pg_multixact/")
-                {
-                    trace!("sending {}", relpath.display());
-                    ar.append_path_with_name(fullpath, relpath)?;
-                }
-            } else {
-                trace!("not sending {}", relpath.display());
-            }
-        } else {
-            error!("unknown file type: {}", fullpath.display());
-        }
-    }
-
-    add_slru_segments(
-        &mut ar,
-        timeline,
-        "pg_xact",
-        pg_constants::PG_XACT_FORKNUM,
-        lsn,
-    )?;
-    add_slru_segments(
-        &mut ar,
-        timeline,
-        "pg_multixact/members",
-        pg_constants::PG_MXACT_MEMBERS_FORKNUM,
-        lsn,
-    )?;
-    add_slru_segments(
-        &mut ar,
-        timeline,
-        "pg_multixact/offsets",
-        pg_constants::PG_MXACT_OFFSETS_FORKNUM,
-        lsn,
-    )?;
-    add_relmap_files(&mut ar, timeline, lsn, &snappath)?;
-    add_twophase_files(&mut ar, timeline, lsn)?;
-    add_pgcontrol_file(&mut ar, timeline, lsn)?;
-
-    ar.finish()?;
-    debug!("all tarred up!");
-    Ok(())
-}
-
-///
-/// Send a tarball containing a snapshot of all non-relation files in the
-/// PostgreSQL data directory, at given LSN
-///
-/// There must be a snapshot at the given LSN in the snapshots directory, we cannot
-/// reconstruct the state at an arbitrary LSN at the moment.
-///
-pub fn send_snapshot_tarball(
-    write: &mut dyn Write,
-    timelineid: ZTimelineId,
-    snapshotlsn: Lsn,
-) -> Result<(), std::io::Error> {
-    let mut ar = Builder::new(write);
-
-    let snappath = format!("timelines/{}/snapshots/{:016X}", timelineid, snapshotlsn.0);
-    let walpath = format!("timelines/{}/wal", timelineid);
-
-    debug!("sending tarball of snapshot in {}", snappath);
-    //ar.append_dir_all("", &snappath)?;
-
-    for entry in WalkDir::new(&snappath) {
-        let entry = entry?;
-        let fullpath = entry.path();
-        let relpath = entry.path().strip_prefix(&snappath).unwrap();
-
-        if relpath.to_str().unwrap() == "" {
-            continue;
-        }
-
-        if entry.file_type().is_dir() {
-            trace!(
-                "sending dir {} as {}",
-                fullpath.display(),
-                relpath.display()
-            );
-            ar.append_dir(relpath, fullpath)?;
-        } else if entry.file_type().is_symlink() {
-            error!("ignoring symlink in snapshot dir");
-        } else if entry.file_type().is_file() {
-            // Shared catalogs are exempt
-            if relpath.starts_with("global/") {
-                trace!("sending shared catalog {}", relpath.display());
-                ar.append_path_with_name(fullpath, relpath)?;
-            } else if !is_rel_file_path(relpath.to_str().unwrap()) {
-                trace!("sending {}", relpath.display());
-                ar.append_path_with_name(fullpath, relpath)?;
-            } else {
-                trace!("not sending {}", relpath.display());
-
-                // FIXME: For now, also send all the relation files.
-                // This really shouldn't be necessary, and kind of
-                // defeats the point of having a page server in the
-                // first place. But it is useful at least when
-                // debugging with the DEBUG_COMPARE_LOCAL option (see
-                // vendor/postgres/src/backend/storage/smgr/pagestore_smgr.c)
-
-                ar.append_path_with_name(fullpath, relpath)?;
-            }
-        } else {
-            error!("unknown file type: {}", fullpath.display());
-        }
-    }
-
-    // FIXME: Also send all the WAL. The compute node would only need
-    // the WAL that applies to non-relation files, because the page
-    // server handles all the relation files. But we don't have a
-    // mechanism for separating relation and non-relation WAL at the
-    // moment.
-    for entry in std::fs::read_dir(&walpath)? {
-        let entry = entry?;
-        let fullpath = &entry.path();
-        let relpath = fullpath.strip_prefix(&walpath).unwrap();
-
-        if !entry.path().is_file() {
-            continue;
-        }
-
-        let archive_fname = relpath.to_str().unwrap();
-        let archive_fname = archive_fname
-            .strip_suffix(".partial")
-            .unwrap_or(&archive_fname);
-        let archive_path = "pg_wal/".to_owned() + archive_fname;
-        ar.append_path_with_name(fullpath, archive_path)?;
-    }
-
-    ar.finish()?;
-    debug!("all tarred up!");
-    Ok(())
-}
-
-///
-/// Parse a path, relative to the root of PostgreSQL data directory, as
-/// a PostgreSQL relation data file.
-///
-fn parse_rel_file_path(path: &str) -> Result<(), FilePathError> {
-    /*
-     * Relation data files can be in one of the following directories:
-     *
-     * global/
-     *		shared relations
-     *
-     * base/<db oid>/
-     *		regular relations, default tablespace
-     *
-     * pg_tblspc/<tblspc oid>/<tblspc version>/
-     *		within a non-default tablespace (the name of the directory
-     *		depends on version)
-     *
-     * And the relation data files themselves have a filename like:
-     *
-     * <oid>.<segment number>
-     */
-    if let Some(fname) = path.strip_prefix("global/") {
-        let (_relnode, _forknum, _segno) = parse_relfilename(fname)?;
-
-        Ok(())
-    } else if let Some(dbpath) = path.strip_prefix("base/") {
-        let mut s = dbpath.split('/');
-        let dbnode_str = s.next().ok_or(FilePathError::InvalidFileName)?;
-        let _dbnode = dbnode_str.parse::<u32>()?;
-        let fname = s.next().ok_or(FilePathError::InvalidFileName)?;
-        if s.next().is_some() {
-            return Err(FilePathError::InvalidFileName);
-        };
-
-        let (_relnode, _forknum, _segno) = parse_relfilename(fname)?;
-
-        Ok(())
-    } else if let Some(_) = path.strip_prefix("pg_tblspc/") {
-        // TODO
-        error!("tablespaces not implemented yet");
-        Err(FilePathError::InvalidFileName)
-    } else {
-        Err(FilePathError::InvalidFileName)
-    }
-}
-
-fn is_rel_file_path(path: &str) -> bool {
-    parse_rel_file_path(path).is_ok()
-}
--- a/pageserver/src/bin/cli/main.rs
+++ b/pageserver/src/bin/cli/main.rs
@@ -0,0 +1,43 @@
+use anyhow::Result;
+use clap::{App, AppSettings};
+
+pub mod pg;
+pub mod snapshot;
+pub mod storage;
+mod subcommand;
+
+fn main() -> Result<()> {
+    let cli_commands = subcommand::ClapCommands {
+        commands: vec![
+            Box::new(pg::PgCmd {
+                clap_cmd: clap::SubCommand::with_name("pg"),
+            }),
+            Box::new(storage::StorageCmd {
+                clap_cmd: clap::SubCommand::with_name("storage"),
+            }),
+            Box::new(snapshot::SnapshotCmd {
+                clap_cmd: clap::SubCommand::with_name("snapshot"),
+            }),
+        ],
+    };
+
+    let matches = App::new("zenith")
+        .about("Zenith CLI")
+        .version("1.0")
+        .setting(AppSettings::SubcommandRequiredElseHelp)
+        .subcommands(cli_commands.generate())
+        .get_matches();
+
+    if let Some(subcommand) = matches.subcommand_name() {
+        println!("'git {}' was used", subcommand);
+    }
+
+    match matches.subcommand() {
+        ("pg", Some(sub_args)) => cli_commands.commands[0].run(sub_args.clone())?,
+        ("storage", Some(sub_args)) => cli_commands.commands[1].run(sub_args.clone())?,
+        ("snapshot", Some(sub_args)) => cli_commands.commands[2].run(sub_args.clone())?,
+        ("", None) => println!("No subcommand"),
+        _ => unreachable!(),
+    }
+    Ok(())
+}
--- a/pageserver/src/bin/cli/pg.rs
+++ b/pageserver/src/bin/cli/pg.rs
@@ -0,0 +1,105 @@
+use anyhow::Result;
+use clap::{App, AppSettings, Arg};
+
+use crate::subcommand;
+
+pub struct PgCmd<'a> {
+    pub clap_cmd: clap::App<'a, 'a>,
+}
+
+impl subcommand::SubCommand for PgCmd<'_> {
+    fn gen_clap_command(&self) -> clap::App {
+        let c = self.clap_cmd.clone();
+        c.about("Operations with zenith compute nodes")
+            .setting(AppSettings::SubcommandRequiredElseHelp)
+            .subcommand(App::new("list").about("List existing compute nodes"))
+            .subcommand(
+                App::new("create")
+                    .about(
+                        "Create (init) new data directory using given storage and start postgres",
+                    )
+                    .arg(
+                        Arg::with_name("name")
+                            .short("n")
+                            .long("name")
+                            .takes_value(true)
+                            .help("Name of the compute node"),
+                    )
+                    .arg(
+                        Arg::with_name("storage")
+                            .short("s")
+                            .long("storage")
+                            .takes_value(true)
+                            .help("Name of the storage node to use"),
+                    )
+                    //TODO should it be just name of uploaded snapshot or some path?
+                    .arg(
+                        Arg::with_name("snapshot")
+                            .long("snapshot")
+                            .takes_value(true)
+                            .help("Name of the snapshot to use"),
+                    )
+                    .arg(
+                        Arg::with_name("nostart")
+                            .long("no-start")
+                            .takes_value(false)
+                            .help("Don't start postgres on the created node"),
+                    ),
+            )
+            .subcommand(
+                App::new("destroy")
+                    .about("Stop postgres and destroy node's data directory")
+                    .arg(
+                        Arg::with_name("name")
+                            .short("n")
+                            .long("name")
+                            .takes_value(true)
+                            .help("Name of the compute node"),
+                    ),
+            )
+            .subcommand(
+                App::new("start")
+                    .about("Start postgres on the given node")
+                    .arg(
+                        Arg::with_name("name")
+                            .short("n")
+                            .long("name")
+                            .takes_value(true)
+                            .help("Name of the compute node"),
+                    )
+                    .arg(
+                        Arg::with_name("replica")
+                            .long("replica")
+                            .takes_value(false)
+                            .help("Start the compute node as replica"),
+                    ),
+            )
+            .subcommand(
+                App::new("stop")
+                    .about("Stop postgres on the given node")
+                    .arg(
+                        Arg::with_name("name")
+                            .short("n")
+                            .long("name")
+                            .takes_value(true)
+                            .help("Name of the compute node"),
+                    ),
+            )
+            .subcommand(
+                App::new("show")
+                    .about("Show info about the given node")
+                    .arg(
+                        Arg::with_name("name")
+                            .short("n")
+                            .long("name")
+                            .takes_value(true)
+                            .help("Name of the compute node"),
+                    ),
+            )
+    }
+
+    fn run(&self, args: clap::ArgMatches) -> Result<()> {
+        println!("Run PgCmd with args {:?}", args);
+        Ok(())
+    }
+}
--- a/pageserver/src/bin/cli/snapshot.rs
+++ b/pageserver/src/bin/cli/snapshot.rs
@@ -0,0 +1,27 @@
+use anyhow::Result;
+use clap::{App, AppSettings, Arg};
+
+use crate::subcommand;
+
+pub struct SnapshotCmd<'a> {
+    pub clap_cmd: clap::App<'a, 'a>,
+}
+
+impl subcommand::SubCommand for SnapshotCmd<'_> {
+    fn gen_clap_command(&self) -> clap::App {
+        let c = self.clap_cmd.clone();
+        c.about("Operations with zenith snapshots")
+            .setting(AppSettings::SubcommandRequiredElseHelp)
+            .subcommand(App::new("list"))
+            .subcommand(App::new("create").arg(Arg::with_name("pgdata").required(true)))
+            .subcommand(App::new("destroy"))
+            .subcommand(App::new("start"))
+            .subcommand(App::new("stop"))
+            .subcommand(App::new("show"))
+    }
+
+    fn run(&self, args: clap::ArgMatches) -> Result<()> {
+        println!("Run SnapshotCmd with args {:?}", args);
+        Ok(())
+    }
+}
--- a/pageserver/src/bin/cli/storage.rs
+++ b/pageserver/src/bin/cli/storage.rs
@@ -0,0 +1,25 @@
+use anyhow::Result;
+use clap::{App, AppSettings};
+
+use crate::subcommand;
+
+pub struct StorageCmd<'a> {
+    pub clap_cmd: clap::App<'a, 'a>,
+}
+
+impl subcommand::SubCommand for StorageCmd<'_> {
+    fn gen_clap_command(&self) -> clap::App {
+        let c = self.clap_cmd.clone();
+        c.about("Operations with zenith storage nodes")
+            .setting(AppSettings::SubcommandRequiredElseHelp)
+            .subcommand(App::new("list"))
+            .subcommand(App::new("attach"))
+            .subcommand(App::new("detach"))
+            .subcommand(App::new("show"))
+    }
+
+    fn run(&self, args: clap::ArgMatches) -> Result<()> {
+        println!("Run StorageCmd with args {:?}", args);
+        Ok(())
+    }
+}
--- a/pageserver/src/bin/cli/subcommand.rs
+++ b/pageserver/src/bin/cli/subcommand.rs
@@ -0,0 +1,29 @@
+use anyhow::Result;
+
+/// All subcommands need to implement this interface.
+pub trait SubCommand {
+    /// Generates the cli-config that Clap requires for the subcommand.
+    fn gen_clap_command(&self) -> clap::App;
+
+    /// Runs the body of the subcommand.
+    fn run(&self, args: clap::ArgMatches) -> Result<()>;
+}
+
+/// A struct which holds a vector of heap-allocated `Box`es of trait objects all of which must
+/// implement the `SubCommand` trait, but other than that, can be of any type.
+pub struct ClapCommands {
+    pub commands: Vec<Box<dyn SubCommand>>,
+}
+
+impl ClapCommands {
+    /// Generates a vector of `clap::Apps` that can be passed into clap's `.subcommands()` method in
+    /// order to generate the full CLI.
+    pub fn generate(&self) -> Vec<clap::App> {
+        let mut v: Vec<clap::App> = Vec::new();
+
+        for command in self.commands.iter() {
+            v.push(command.gen_clap_command());
+        }
+        v
+    }
+}
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -3,114 +3,73 @@
 //

 use log::*;
-use parse_duration::parse;
+use std::fs;
 use std::io;
-use std::process::exit;
+use std::path::PathBuf;
 use std::thread;
-use std::time::Duration;
-use std::{env, path::PathBuf};
-use std::{
-    fs::{File, OpenOptions},
-    net::TcpListener,
-};
+use std::{fs::File, fs::OpenOptions, str::FromStr};

-use anyhow::{Context, Result};
 use clap::{App, Arg};
 use daemonize::Daemonize;

-use slog::{Drain, FnValue};
+use slog;
+use slog::Drain;
+use slog_scope;
+use slog_stdlog;

-use pageserver::{branches, page_cache, page_service, tui, PageServerConf};
+use pageserver::page_service;
+use pageserver::restore_s3;
+use pageserver::tui;
+use pageserver::walreceiver;
+use pageserver::PageServerConf;

-const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
-const DEFAULT_GC_PERIOD_SEC: u64 = 10;
-//const DEFAULT_GC_HORIZON: u64 = 1024 * 1024 * 1024;
-//const DEFAULT_GC_PERIOD_SEC: u64 = 600;
-
-fn main() -> Result<()> {
+fn main() -> Result<(), io::Error> {
    let arg_matches = App::new("Zenith page server")
        .about("Materializes WAL stream to pages and serves them to the postgres")
-        .arg(
-            Arg::with_name("listen")
-                .short("l")
-                .long("listen")
-                .takes_value(true)
-                .help("listen for incoming page requests on ip:port (default: 127.0.0.1:5430)"),
-        )
-        .arg(
-            Arg::with_name("interactive")
-                .short("i")
-                .long("interactive")
-                .takes_value(false)
-                .help("Interactive mode"),
-        )
-        .arg(
-            Arg::with_name("daemonize")
-                .short("d")
-                .long("daemonize")
-                .takes_value(false)
-                .help("Run in the background"),
-        )
-        .arg(
-            Arg::with_name("init")
-                .long("init")
-                .takes_value(false)
-                .help("Initialize pageserver repo"),
-        )
-        .arg(
-            Arg::with_name("gc_horizon")
-                .long("gc_horizon")
-                .takes_value(true)
-                .help("Distance from current LSN to perform all wal records cleanup"),
-        )
-        .arg(
-            Arg::with_name("gc_period")
-                .long("gc_period")
-                .takes_value(true)
-                .help("Interval between garbage collector iterations"),
-        )
-        .arg(
-            Arg::with_name("workdir")
-                .short("D")
-                .long("workdir")
-                .takes_value(true)
-                .help("Working directory for the pageserver"),
-        )
+        .arg(Arg::with_name("datadir")
+                 .short("D")
+                 .long("dir")
+                 .takes_value(true)
+                 .help("Path to the page server data directory"))
+        .arg(Arg::with_name("wal_producer")
+                 .short("w")
+                 .long("wal-producer")
+                 .takes_value(true)
+                 .help("connect to the WAL sender (postgres or wal_acceptor) on connstr (default: 'host=127.0.0.1 port=65432 user=zenith')"))
+        .arg(Arg::with_name("listen")
+                 .short("l")
+                 .long("listen")
+                 .takes_value(true)
+                 .help("listen for incoming page requests on ip:port (default: 127.0.0.1:5430)"))
+        .arg(Arg::with_name("interactive")
+                 .short("i")
+                 .long("interactive")
+                 .takes_value(false)
+                 .help("Interactive mode"))
+        .arg(Arg::with_name("daemonize")
+                 .short("d")
+                 .long("daemonize")
+                 .takes_value(false)
+                 .help("Run in the background"))
+        .arg(Arg::with_name("skip_recovery")
+                 .long("skip-recovery")
+                 .takes_value(false)
+                 .help("Skip S3 recovery procedy and start empty"))
        .get_matches();

-    let workdir = if let Some(workdir_arg) = arg_matches.value_of("workdir") {
-        PathBuf::from(workdir_arg)
-    } else if let Some(workdir_arg) = std::env::var_os("ZENITH_REPO_DIR") {
-        PathBuf::from(workdir_arg.to_str().unwrap())
-    } else {
-        PathBuf::from(".zenith")
-    };
-
-    let pg_distrib_dir: PathBuf = {
-        if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
-            postgres_bin.into()
-        } else {
-            let cwd = env::current_dir()?;
-            cwd.join("tmp_install")
-        }
-    };
-
-    if !pg_distrib_dir.join("bin/postgres").exists() {
-        anyhow::bail!("Can't find postgres binary at {:?}", pg_distrib_dir);
-    }
-
    let mut conf = PageServerConf {
+        data_dir: PathBuf::from("./"),
        daemonize: false,
        interactive: false,
-        gc_horizon: DEFAULT_GC_HORIZON,
-        gc_period: Duration::from_secs(DEFAULT_GC_PERIOD_SEC),
-        listen_addr: "127.0.0.1:64000".parse().unwrap(),
-        // we will change the current working directory to the repository below,
-        // so always set 'workdir' to '.'
-        workdir: PathBuf::from("."),
-        pg_distrib_dir,
+        wal_producer_connstr: None,
+        listen_addr: "127.0.0.1:5430".parse().unwrap(),
+        skip_recovery: false,
    };

+    if let Some(dir) = arg_matches.value_of("datadir") {
+        conf.data_dir = PathBuf::from(dir);
+    }
+
    if arg_matches.is_present("daemonize") {
        conf.daemonize = true;
    }
@@ -120,84 +79,70 @@ fn main() -> Result<()> {
    }

    if conf.daemonize && conf.interactive {
-        eprintln!("--daemonize is not allowed with --interactive: choose one");
-        exit(1);
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidInput,
+            "--daemonize is not allowed with --interactive: choose one",
+        ));
+    }
+
+    if arg_matches.is_present("skip_recovery") {
+        conf.skip_recovery = true;
+    }
+
+    if let Some(addr) = arg_matches.value_of("wal_producer") {
+        conf.wal_producer_connstr = Some(String::from_str(addr).unwrap());
    }

    if let Some(addr) = arg_matches.value_of("listen") {
-        conf.listen_addr = addr.parse()?;
+        conf.listen_addr = addr.parse().unwrap();
    }

-    if let Some(horizon) = arg_matches.value_of("gc_horizon") {
-        conf.gc_horizon = horizon.parse()?;
-    }
-
-    if let Some(period) = arg_matches.value_of("gc_period") {
-        conf.gc_period = parse(period)?;
-    }
-
-    // The configuration is all set up now. Turn it into a 'static
-    // that can be freely stored in structs and passed across threads
-    // as a ref.
-    let conf: &'static PageServerConf = Box::leak(Box::new(conf));
-
-    // Create repo and exit if init was requested
-    if arg_matches.is_present("init") {
-        branches::init_repo(conf, &workdir)?;
-        return Ok(());
-    }
-
-    // Set CWD to workdir for non-daemon modes
-    env::set_current_dir(&workdir)?;
-
    start_pageserver(conf)
 }

-fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
-    let log_filename = "pageserver.log";
-    // Don't open the same file for output multiple times;
-    // the different fds could overwrite each other's output.
-    let log_file = OpenOptions::new()
-        .create(true)
-        .append(true)
-        .open(&log_filename)
-        .with_context(|| format!("failed to open {:?}", &log_filename))?;
-
+fn start_pageserver(conf: PageServerConf) -> Result<(), io::Error> {
    // Initialize logger
-    let logger_file = log_file.try_clone().unwrap();
-    let _scope_guard = init_logging(&conf, logger_file)?;
-    let _log_guard = slog_stdlog::init()?;
+    let _scope_guard = init_logging(&conf);
+    let _log_guard = slog_stdlog::init().unwrap();

    // Note: this `info!(...)` macro comes from `log` crate
    info!("standard logging redirected to slog");

-    let tui_thread = if conf.interactive {
+    let tui_thread: Option<thread::JoinHandle<()>>;
+    if conf.interactive {
        // Initialize the UI
-        Some(
+        tui_thread = Some(
            thread::Builder::new()
                .name("UI thread".into())
                .spawn(|| {
                    let _ = tui::ui_main();
                })
                .unwrap(),
-        )
+        );
+        //threads.push(tui_thread);
    } else {
-        None
-    };
-
-    // TODO: Check that it looks like a valid repository before going further
+        tui_thread = None;
+    }

    if conf.daemonize {
        info!("daemonizing...");

        // There should'n be any logging to stdin/stdout. Redirect it to the main log so
-        // that we will see any accidental manual fprintf's or backtraces.
-        let stdout = log_file.try_clone().unwrap();
-        let stderr = log_file;
+        // that we will see any accidental manual fpritf's or backtraces.
+        let stdout = OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(conf.data_dir.join("pageserver.log"))
+            .unwrap();
+        let stderr = OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(conf.data_dir.join("pageserver.log"))
+            .unwrap();

        let daemonize = Daemonize::new()
-            .pid_file("pageserver.pid")
-            .working_directory(".")
+            .pid_file(conf.data_dir.join("pageserver.pid"))
+            .working_directory(conf.data_dir.clone())
            .stdout(stdout)
            .stderr(stderr);

@@ -207,62 +152,89 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
        }
    }

-    // Check that we can bind to address before further initialization
-    info!("Starting pageserver on {}", conf.listen_addr);
-    let pageserver_listener = TcpListener::bind(conf.listen_addr)?;
+    let mut threads = Vec::new();

-    // Initialize page cache, this will spawn walredo_thread
-    page_cache::init(conf);
+    info!("starting...");

-    // Spawn a thread to listen for connections. It will spawn further threads
-    // for each connection.
-    let page_service_thread = thread::Builder::new()
-        .name("Page Service thread".into())
-        .spawn(move || page_service::thread_main(conf, pageserver_listener))?;
-
-    if let Some(tui_thread) = tui_thread {
-        // The TUI thread exits when the user asks to Quit.
-        tui_thread.join().unwrap();
-    } else {
-        page_service_thread
-            .join()
-            .expect("Page service thread has panicked")?
+    // Before opening up for connections, restore the latest base backup from S3.
+    // (We don't persist anything to local disk at the moment, so we need to do
+    // this at every startup)
+    // TODO move it to a separate function
+    if !conf.skip_recovery {
+        restore_s3::restore_main(&conf);
    }

+    // Create directory for wal-redo datadirs
+    match fs::create_dir(conf.data_dir.join("wal-redo")) {
+        Ok(_) => {}
+        Err(e) => match e.kind() {
+            io::ErrorKind::AlreadyExists => {}
+            _ => {
+                panic!("Failed to create wal-redo data directory: {}", e);
+            }
+        },
+    }
+
+    // Launch the WAL receiver thread if pageserver was started with --wal-producer
+    // option. It will try to connect to the WAL safekeeper, and stream the WAL. If
+    // the connection is lost, it will reconnect on its own. We just fire and forget
+    // it here.
+    //
+    // All other wal receivers are started on demand by "callmemaybe" command
+    // sent to pageserver.
+    let conf_copy = conf.clone();
+    if let Some(wal_producer) = conf.wal_producer_connstr {
+        let conf = conf_copy.clone();
+        let walreceiver_thread = thread::Builder::new()
+            .name("static WAL receiver thread".into())
+            .spawn(move || {
+                walreceiver::thread_main(conf, &wal_producer);
+            })
+            .unwrap();
+        threads.push(walreceiver_thread);
+    }
+
+    // GetPage@LSN requests are served by another thread. (It uses async I/O,
+    // but the code in page_service sets up it own thread pool for that)
+    let conf = conf_copy.clone();
+    let page_server_thread = thread::Builder::new()
+        .name("Page Service thread".into())
+        .spawn(|| {
+            // thread code
+            page_service::thread_main(conf);
+        })
+        .unwrap();
+    threads.push(page_server_thread);
+
+    if tui_thread.is_some() {
+        // The TUI thread exits when the user asks to Quit.
+        tui_thread.unwrap().join().unwrap();
+    } else {
+        // In non-interactive mode, wait forever.
+        for t in threads {
+            t.join().unwrap()
+        }
+    }
    Ok(())
 }

-fn init_logging(
-    conf: &PageServerConf,
-    log_file: File,
-) -> Result<slog_scope::GlobalLoggerGuard, io::Error> {
+fn init_logging(conf: &PageServerConf) -> slog_scope::GlobalLoggerGuard {
    if conf.interactive {
-        Ok(tui::init_logging())
+        tui::init_logging()
    } else if conf.daemonize {
+        let log = conf.data_dir.join("pageserver.log");
+        let log_file = File::create(log).unwrap_or_else(|_| panic!("Could not create log file"));
        let decorator = slog_term::PlainSyncDecorator::new(log_file);
-        let drain = slog_term::FullFormat::new(decorator).build();
+        let drain = slog_term::CompactFormat::new(decorator).build();
        let drain = slog::Filter::new(drain, |record: &slog::Record| {
            if record.level().is_at_least(slog::Level::Info) {
                return true;
            }
-            false
+            return false;
        });
        let drain = std::sync::Mutex::new(drain).fuse();
-        let logger = slog::Logger::root(
-            drain,
-            slog::o!(
-                "location" =>
-                FnValue(move |record| {
-                    format!("{}, {}:{}",
-                            record.module(),
-                            record.file(),
-                            record.line()
-                            )
-                    }
-                )
-            ),
-        );
-        Ok(slog_scope::set_global_logger(logger))
+        let logger = slog::Logger::root(drain, slog::o!());
+        slog_scope::set_global_logger(logger)
    } else {
        let decorator = slog_term::TermDecorator::new().build();
        let drain = slog_term::FullFormat::new(decorator).build().fuse();
@@ -276,10 +248,10 @@ fn init_logging(
            {
                return true;
            }
-            false
+            return false;
        })
        .fuse();
        let logger = slog::Logger::root(drain, slog::o!());
-        Ok(slog_scope::set_global_logger(logger))
+        slog_scope::set_global_logger(logger)
    }
 }
--- a/pageserver/src/branches.rs
+++ b/pageserver/src/branches.rs
@@ -1,409 +0,0 @@
-//
-// Branch management code
-//
-// TODO: move all paths construction to conf impl
-//
-
-use anyhow::{anyhow, bail, Context, Result};
-use bytes::Bytes;
-use fs::File;
-use fs_extra;
-use postgres_ffi::{pg_constants, xlog_utils};
-use rand::Rng;
-use serde::{Deserialize, Serialize};
-use std::env;
-use std::io::{Read, Write};
-use std::{
-    collections::HashMap,
-    fs, io,
-    path::{Path, PathBuf},
-    process::{Command, Stdio},
-    str::FromStr,
-};
-use zenith_utils::lsn::Lsn;
-
-use crate::{repository::Repository, PageServerConf, ZTimelineId};
-
-#[derive(Serialize, Deserialize, Clone)]
-pub struct BranchInfo {
-    pub name: String,
-    pub timeline_id: ZTimelineId,
-    pub latest_valid_lsn: Option<Lsn>,
-    pub ancestor_id: Option<String>,
-    pub ancestor_lsn: Option<String>,
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct PointInTime {
-    pub timelineid: ZTimelineId,
-    pub lsn: Lsn,
-}
-
-pub fn init_repo(conf: &PageServerConf, repo_dir: &Path) -> Result<()> {
-    // top-level dir may exist if we are creating it through CLI
-    fs::create_dir_all(repo_dir)
-        .with_context(|| format!("could not create directory {}", repo_dir.display()))?;
-
-    env::set_current_dir(repo_dir)?;
-
-    fs::create_dir(std::path::Path::new("timelines"))?;
-    fs::create_dir(std::path::Path::new("refs"))?;
-    fs::create_dir(std::path::Path::new("refs").join("branches"))?;
-    fs::create_dir(std::path::Path::new("refs").join("tags"))?;
-    fs::create_dir(std::path::Path::new("wal-redo"))?;
-
-    println!("created directory structure in {}", repo_dir.display());
-
-    // Create initial timeline
-    let tli = create_timeline(conf, None)?;
-    let timelinedir = conf.timeline_path(tli);
-    println!("created initial timeline {}", tli);
-
-    // Run initdb
-    //
-    // We create the cluster temporarily in a "tmp" directory inside the repository,
-    // and move it to the right location from there.
-    let tmppath = std::path::Path::new("tmp");
-
-    print!("running initdb... ");
-    io::stdout().flush()?;
-
-    let initdb_path = conf.pg_bin_dir().join("initdb");
-    let initdb_otput = Command::new(initdb_path)
-        .args(&["-D", tmppath.to_str().unwrap()])
-        .arg("--no-instructions")
-        .env_clear()
-        .env("LD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
-        .env("DYLD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
-        .stdout(Stdio::null())
-        .output()
-        .with_context(|| "failed to execute initdb")?;
-    if !initdb_otput.status.success() {
-        anyhow::bail!("initdb failed");
-    }
-    println!("initdb succeeded");
-
-    // Read control file to extract the LSN and system id
-    let controlfile_path = tmppath.join("global").join("pg_control");
-    let controlfile = postgres_ffi::decode_pg_control(Bytes::from(fs::read(controlfile_path)?))?;
-    // let systemid = controlfile.system_identifier;
-    let lsn = controlfile.checkPoint;
-    let lsnstr = format!("{:016X}", lsn);
-
-    // Move the initial WAL file
-    fs::rename(
-        tmppath.join("pg_wal").join("000000010000000000000001"),
-        timelinedir
-            .join("wal")
-            .join("000000010000000000000001.partial"),
-    )?;
-    println!("moved initial WAL file");
-
-    // Remove pg_wal
-    fs::remove_dir_all(tmppath.join("pg_wal"))?;
-
-    let target = timelinedir.join("snapshots").join(&lsnstr);
-    fs::rename(tmppath, &target)?;
-
-    // Create 'main' branch to refer to the initial timeline
-    let data = tli.to_string();
-    fs::write(conf.branch_path("main"), data)?;
-    println!("created main branch");
-
-    println!(
-        "new zenith repository was created in {}",
-        repo_dir.display()
-    );
-
-    Ok(())
-}
-
-pub(crate) fn get_branches(
-    conf: &PageServerConf,
-    repository: &dyn Repository,
-) -> Result<Vec<BranchInfo>> {
-    // Each branch has a corresponding record (text file) in the refs/branches
-    // with timeline_id.
-    let branches_dir = std::path::Path::new("refs").join("branches");
-
-    std::fs::read_dir(&branches_dir)?
-        .map(|dir_entry_res| {
-            let dir_entry = dir_entry_res?;
-            let name = dir_entry.file_name().to_str().unwrap().to_string();
-            let timeline_id = std::fs::read_to_string(dir_entry.path())?.parse::<ZTimelineId>()?;
-
-            let latest_valid_lsn = repository
-                .get_timeline(timeline_id)
-                .map(|timeline| timeline.get_last_valid_lsn())
-                .ok();
-
-            let ancestor_path = conf.ancestor_path(timeline_id);
-            let mut ancestor_id: Option<String> = None;
-            let mut ancestor_lsn: Option<String> = None;
-
-            if ancestor_path.exists() {
-                let ancestor = std::fs::read_to_string(ancestor_path)?;
-                let mut strings = ancestor.split('@');
-
-                ancestor_id = Some(
-                    strings
-                        .next()
-                        .with_context(|| "wrong branch ancestor point in time format")?
-                        .to_owned(),
-                );
-                ancestor_lsn = Some(
-                    strings
-                        .next()
-                        .with_context(|| "wrong branch ancestor point in time format")?
-                        .to_owned(),
-                );
-            }
-
-            Ok(BranchInfo {
-                name,
-                timeline_id,
-                latest_valid_lsn,
-                ancestor_id,
-                ancestor_lsn,
-            })
-        })
-        .collect()
-}
-
-pub(crate) fn get_system_id(conf: &PageServerConf) -> Result<u64> {
-    // let branches = get_branches();
-
-    let branches_dir = std::path::Path::new("refs").join("branches");
-    let branches = std::fs::read_dir(&branches_dir)?
-        .map(|dir_entry_res| {
-            let dir_entry = dir_entry_res?;
-            let name = dir_entry.file_name().to_str().unwrap().to_string();
-            let timeline_id = std::fs::read_to_string(dir_entry.path())?.parse::<ZTimelineId>()?;
-            Ok((name, timeline_id))
-        })
-        .collect::<Result<HashMap<String, ZTimelineId>>>()?;
-
-    let main_tli = branches
-        .get("main")
-        .ok_or_else(|| anyhow!("Branch main not found"))?;
-
-    let (_, main_snap_dir) = find_latest_snapshot(conf, *main_tli)?;
-    let controlfile_path = main_snap_dir.join("global").join("pg_control");
-    let controlfile = postgres_ffi::decode_pg_control(Bytes::from(fs::read(controlfile_path)?))?;
-    Ok(controlfile.system_identifier)
-}
-
-pub(crate) fn create_branch(
-    conf: &PageServerConf,
-    branchname: &str,
-    startpoint_str: &str,
-) -> Result<BranchInfo> {
-    if conf.branch_path(&branchname).exists() {
-        anyhow::bail!("branch {} already exists", branchname);
-    }
-
-    let mut startpoint = parse_point_in_time(conf, startpoint_str)?;
-
-    if startpoint.lsn == Lsn(0) {
-        // Find end of WAL on the old timeline
-        let end_of_wal = find_end_of_wal(conf, startpoint.timelineid)?;
-        println!("branching at end of WAL: {}", end_of_wal);
-        startpoint.lsn = end_of_wal;
-    }
-
-    // create a new timeline for it
-    let newtli = create_timeline(conf, Some(startpoint))?;
-    let newtimelinedir = conf.timeline_path(newtli);
-
-    let data = newtli.to_string();
-    fs::write(conf.branch_path(&branchname), data)?;
-
-    // Copy the latest snapshot (TODO: before the startpoint) and all WAL
-    // TODO: be smarter and avoid the copying...
-    let (_maxsnapshot, oldsnapshotdir) = find_latest_snapshot(conf, startpoint.timelineid)?;
-    let copy_opts = fs_extra::dir::CopyOptions::new();
-    fs_extra::dir::copy(oldsnapshotdir, newtimelinedir.join("snapshots"), &copy_opts)?;
-
-    let oldtimelinedir = conf.timeline_path(startpoint.timelineid);
-    copy_wal(
-        &oldtimelinedir.join("wal"),
-        &newtimelinedir.join("wal"),
-        startpoint.lsn,
-        pg_constants::WAL_SEGMENT_SIZE,
-    )?;
-
-    Ok(BranchInfo {
-        name: branchname.to_string(),
-        timeline_id: newtli,
-        latest_valid_lsn: Some(startpoint.lsn),
-        ancestor_id: None,
-        ancestor_lsn: None,
-    })
-}
-
-//
-// Parse user-given string that represents a point-in-time.
-//
-// We support multiple variants:
-//
-// Raw timeline id in hex, meaning the end of that timeline:
-//    bc62e7d612d0e6fe8f99a6dd2f281f9d
-//
-// A specific LSN on a timeline:
-//    bc62e7d612d0e6fe8f99a6dd2f281f9d@2/15D3DD8
-//
-// Same, with a human-friendly branch name:
-//    main
-//    main@2/15D3DD8
-//
-// Human-friendly tag name:
-//    mytag
-//
-//
-fn parse_point_in_time(conf: &PageServerConf, s: &str) -> Result<PointInTime> {
-    let mut strings = s.split('@');
-    let name = strings.next().unwrap();
-
-    let lsn: Option<Lsn>;
-    if let Some(lsnstr) = strings.next() {
-        lsn = Some(
-            Lsn::from_str(lsnstr).with_context(|| "invalid LSN in point-in-time specification")?,
-        );
-    } else {
-        lsn = None
-    }
-
-    // Check if it's a tag
-    if lsn.is_none() {
-        let tagpath = conf.tag_path(name);
-        if tagpath.exists() {
-            let pointstr = fs::read_to_string(tagpath)?;
-
-            return parse_point_in_time(conf, &pointstr);
-        }
-    }
-
-    // Check if it's a branch
-    // Check if it's branch @ LSN
-    let branchpath = conf.branch_path(name);
-    if branchpath.exists() {
-        let pointstr = fs::read_to_string(branchpath)?;
-
-        let mut result = parse_point_in_time(conf, &pointstr)?;
-
-        result.lsn = lsn.unwrap_or(Lsn(0));
-        return Ok(result);
-    }
-
-    // Check if it's a timelineid
-    // Check if it's timelineid @ LSN
-    if let Ok(timelineid) = ZTimelineId::from_str(name) {
-        let tlipath = conf.timeline_path(timelineid);
-        if tlipath.exists() {
-            return Ok(PointInTime {
-                timelineid,
-                lsn: lsn.unwrap_or(Lsn(0)),
-            });
-        }
-    }
-
-    bail!("could not parse point-in-time {}", s);
-}
-
-fn create_timeline(conf: &PageServerConf, ancestor: Option<PointInTime>) -> Result<ZTimelineId> {
-    // Create initial timeline
-    let mut tli_buf = [0u8; 16];
-    rand::thread_rng().fill(&mut tli_buf);
-    let timelineid = ZTimelineId::from(tli_buf);
-
-    let timelinedir = conf.timeline_path(timelineid);
-
-    fs::create_dir(&timelinedir)?;
-    fs::create_dir(&timelinedir.join("snapshots"))?;
-    fs::create_dir(&timelinedir.join("wal"))?;
-
-    if let Some(ancestor) = ancestor {
-        let data = format!("{}@{}", ancestor.timelineid, ancestor.lsn);
-        fs::write(timelinedir.join("ancestor"), data)?;
-    }
-
-    Ok(timelineid)
-}
-
-///
-/// Copy all WAL segments from one directory to another, up to given LSN.
-///
-/// If the given LSN is in the middle of a segment, the last segment containing it
-/// is written out as .partial, and padded with zeros.
-///
-fn copy_wal(src_dir: &Path, dst_dir: &Path, upto: Lsn, wal_seg_size: usize) -> Result<()> {
-    let last_segno = upto.segment_number(wal_seg_size);
-    let last_segoff = upto.segment_offset(wal_seg_size);
-
-    for entry in fs::read_dir(src_dir).unwrap() {
-        if let Ok(entry) = entry {
-            let entry_name = entry.file_name();
-            let fname = entry_name.to_str().unwrap();
-
-            // Check if the filename looks like an xlog file, or a .partial file.
-            if !xlog_utils::IsXLogFileName(fname) && !xlog_utils::IsPartialXLogFileName(fname) {
-                continue;
-            }
-            let (segno, _tli) = xlog_utils::XLogFromFileName(fname, wal_seg_size as usize);
-
-            let copylen;
-            let mut dst_fname = PathBuf::from(fname);
-            if segno > last_segno {
-                // future segment, skip
-                continue;
-            } else if segno < last_segno {
-                copylen = wal_seg_size;
-                dst_fname.set_extension("");
-            } else {
-                copylen = last_segoff;
-                dst_fname.set_extension("partial");
-            }
-
-            let src_file = File::open(entry.path())?;
-            let mut dst_file = File::create(dst_dir.join(&dst_fname))?;
-            std::io::copy(&mut src_file.take(copylen as u64), &mut dst_file)?;
-
-            if copylen < wal_seg_size {
-                std::io::copy(
-                    &mut std::io::repeat(0).take((wal_seg_size - copylen) as u64),
-                    &mut dst_file,
-                )?;
-            }
-        }
-    }
-    Ok(())
-}
-
-// Find the end of valid WAL in a wal directory
-pub fn find_end_of_wal(conf: &PageServerConf, timeline: ZTimelineId) -> Result<Lsn> {
-    let waldir = conf.timeline_path(timeline).join("wal");
-    let (lsn, _tli) = xlog_utils::find_end_of_wal(&waldir, pg_constants::WAL_SEGMENT_SIZE, true);
-    Ok(Lsn(lsn))
-}
-
-// Find the latest snapshot for a timeline
-fn find_latest_snapshot(conf: &PageServerConf, timeline: ZTimelineId) -> Result<(Lsn, PathBuf)> {
-    let snapshotsdir = conf.snapshots_path(timeline);
-    let paths = fs::read_dir(&snapshotsdir)?;
-    let mut maxsnapshot = Lsn(0);
-    let mut snapshotdir: Option<PathBuf> = None;
-    for path in paths {
-        let path = path?;
-        let filename = path.file_name().to_str().unwrap().to_owned();
-        if let Ok(lsn) = Lsn::from_hex(&filename) {
-            maxsnapshot = std::cmp::max(lsn, maxsnapshot);
-            snapshotdir = Some(path.path());
-        }
-    }
-    if maxsnapshot == Lsn(0) {
-        // TODO: check ancestor timeline
-        anyhow::bail!("no snapshot found in {}", snapshotsdir.display());
-    }
-
-    Ok((maxsnapshot, snapshotdir.unwrap()))
-}
--- a/pageserver/src/controlfile.rs
+++ b/pageserver/src/controlfile.rs
@@ -0,0 +1,218 @@
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+use std::fs::File;
+use std::io::prelude::*;
+use std::io::SeekFrom;
+
+use bytes::{Buf, Bytes};
+
+use log::*;
+
+type XLogRecPtr = u64;
+
+#[repr(C)]
+#[derive(Debug, Clone)]
+/*
+ * Body of CheckPoint XLOG records.  This is declared here because we keep
+ * a copy of the latest one in pg_control for possible disaster recovery.
+ * Changing this struct requires a PG_CONTROL_VERSION bump.
+ */
+pub struct CheckPoint {
+    pub redo: XLogRecPtr,    /* next RecPtr available when we began to
+                              * create CheckPoint (i.e. REDO start point) */
+    pub ThisTimeLineID: u32, /* current TLI */
+    pub PrevTimeLineID: u32, /* previous TLI, if this record begins a new
+                              * timeline (equals ThisTimeLineID otherwise) */
+    pub fullPageWrites: bool, /* current full_page_writes */
+    pub nextXid: u64,         /* next free transaction ID */
+    pub nextOid: u32,         /* next free OID */
+    pub nextMulti: u32,       /* next free MultiXactId */
+    pub nextMultiOffset: u32, /* next free MultiXact offset */
+    pub oldestXid: u32,       /* cluster-wide minimum datfrozenxid */
+    pub oldestXidDB: u32,     /* database with minimum datfrozenxid */
+    pub oldestMulti: u32,     /* cluster-wide minimum datminmxid */
+    pub oldestMultiDB: u32,   /* database with minimum datminmxid */
+    pub time: u64,            /* time stamp of checkpoint */
+    pub oldestCommitTsXid: u32, /* oldest Xid with valid commit
+                               * timestamp */
+    pub newestCommitTsXid: u32, /* newest Xid with valid commit
+                                 * timestamp */
+
+    /*
+     * Oldest XID still running. This is only needed to initialize hot standby
+     * mode from an online checkpoint, so we only bother calculating this for
+     * online checkpoints and only when wal_level is replica. Otherwise it's
+     * set to InvalidTransactionId.
+     */
+    pub oldestActiveXid: u32,
+}
+
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub struct ControlFileDataZenith {
+    pub system_identifier: u64,
+    pg_control_version: u32, /* PG_CONTROL_VERSION */
+    catalog_version_no: u32, /* see catversion.h */
+
+    state: i32, /* see enum above */
+    time: i64,  /* time stamp of last pg_control update */
+    pub checkPoint: XLogRecPtr,
+    checkPointCopy: CheckPoint, /* copy of last check point record */
+    unloggedLSN: XLogRecPtr,    /* current fake LSN value, for unlogged rels */
+    minRecoveryPoint: XLogRecPtr,
+    minRecoveryPointTLI: u32,
+    backupStartPoint: XLogRecPtr,
+    backupEndPoint: XLogRecPtr,
+    backupEndRequired: bool,
+}
+
+impl ControlFileDataZenith {
+    pub fn new() -> ControlFileDataZenith {
+        ControlFileDataZenith {
+            system_identifier: 0,
+            pg_control_version: 0,
+            catalog_version_no: 0,
+            state: 0,
+            time: 0,
+            checkPoint: 0,
+            checkPointCopy: {
+                CheckPoint {
+                    redo: 0,
+                    ThisTimeLineID: 0,
+                    PrevTimeLineID: 0,
+                    fullPageWrites: false,
+                    nextXid: 0,
+                    nextOid: 0,
+                    nextMulti: 0,
+                    nextMultiOffset: 0,
+                    oldestXid: 0,
+                    oldestXidDB: 0,
+                    oldestMulti: 0,
+                    oldestMultiDB: 0,
+                    time: 0,
+                    oldestCommitTsXid: 0,
+                    newestCommitTsXid: 0,
+                    oldestActiveXid: 0,
+                }
+            },
+            unloggedLSN: 0,
+            minRecoveryPoint: 0,
+            minRecoveryPointTLI: 0,
+            backupStartPoint: 0,
+            backupEndPoint: 0,
+            backupEndRequired: false,
+        }
+    }
+}
+
+pub fn decode_pg_control(mut buf: Bytes) -> ControlFileDataZenith {
+    info!("decode pg_control");
+
+    let controlfile: ControlFileDataZenith = ControlFileDataZenith {
+        system_identifier: buf.get_u64_le(),
+        pg_control_version: buf.get_u32_le(),
+        catalog_version_no: buf.get_u32_le(),
+        state: buf.get_i32_le(),
+        time: {
+            buf.advance(4);
+            buf.get_i64_le()
+        },
+        checkPoint: buf.get_u64_le(),
+        checkPointCopy: {
+            CheckPoint {
+                redo: buf.get_u64_le(),
+                ThisTimeLineID: buf.get_u32_le(),
+                PrevTimeLineID: buf.get_u32_le(),
+                fullPageWrites: buf.get_u8() != 0,
+                nextXid: {
+                    buf.advance(7);
+                    buf.get_u64_le()
+                },
+                nextOid: buf.get_u32_le(),
+                nextMulti: buf.get_u32_le(),
+                nextMultiOffset: buf.get_u32_le(),
+                oldestXid: buf.get_u32_le(),
+                oldestXidDB: buf.get_u32_le(),
+                oldestMulti: buf.get_u32_le(),
+                oldestMultiDB: buf.get_u32_le(),
+                time: {
+                    buf.advance(4);
+                    buf.get_u64_le()
+                },
+                oldestCommitTsXid: buf.get_u32_le(),
+                newestCommitTsXid: buf.get_u32_le(),
+                oldestActiveXid: buf.get_u32_le(),
+            }
+        },
+        unloggedLSN: buf.get_u64_le(),
+        minRecoveryPoint: buf.get_u64_le(),
+        minRecoveryPointTLI: buf.get_u32_le(),
+        backupStartPoint: {
+            buf.advance(4);
+            buf.get_u64_le()
+        },
+        backupEndPoint: buf.get_u64_le(),
+        backupEndRequired: buf.get_u8() != 0,
+    };
+
+    return controlfile;
+}
+
+pub fn parse_controlfile(b: Bytes) {
+    let controlfile = decode_pg_control(b);
+
+    info!(
+        "controlfile {:X}/{:X}",
+        controlfile.checkPoint >> 32,
+        controlfile.checkPoint
+    );
+    info!("controlfile {:?}", controlfile);
+}
+
+const MAX_MAPPINGS: usize = 62;
+
+#[derive(Debug)]
+struct RelMapping {
+    mapoid: u32,      /* OID of a catalog */
+    mapfilenode: u32, /* its filenode number */
+}
+
+#[derive(Debug)]
+pub struct RelMapFile {
+    magic: i32,        /* always RELMAPPER_FILEMAGIC */
+    num_mappings: i32, /* number of valid RelMapping entries */
+    mappings: [u8; MAX_MAPPINGS * 8],
+    crc: u32, /* CRC of all above */
+    pad: i32, /* to make the struct size be 512 exactly */
+}
+
+pub fn decode_filemapping(mut buf: Bytes) -> RelMapFile {
+    info!("decode filemap");
+
+    let file: RelMapFile = RelMapFile {
+        magic: buf.get_i32_le(),        /* always RELMAPPER_FILEMAGIC */
+        num_mappings: buf.get_i32_le(), /* number of valid RelMapping entries */
+        mappings: {
+            let mut arr = [0 as u8; MAX_MAPPINGS * 8];
+            buf.copy_to_slice(&mut arr);
+            arr
+        },
+        crc: buf.get_u32_le(), /* CRC of all above */
+        pad: buf.get_i32_le(),
+    };
+
+    info!("decode filemap {:?}", file);
+    file
+}
+
+pub fn write_buf_to_file(filepath: String, buf: Bytes, blkno: u32) {
+    info!("write_buf_to_file {}", filepath.clone());
+
+    let mut buffer = File::create(filepath.clone()).unwrap();
+    buffer.seek(SeekFrom::Start(8192 * blkno as u64)).unwrap();
+
+    buffer.write_all(&buf).unwrap();
+
+    info!("DONE write_buf_to_file {}", filepath);
+}
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -1,17 +1,12 @@
-use serde::{Deserialize, Serialize};
-
-use std::fmt;
 use std::net::SocketAddr;
 use std::path::PathBuf;
-use std::str::FromStr;
-use std::time::Duration;

-pub mod basebackup;
-pub mod branches;
+pub mod controlfile;
 pub mod page_cache;
 pub mod page_service;
-pub mod repository;
-pub mod restore_local_repo;
+#[allow(dead_code)]
+pub mod pg_constants;
+pub mod restore_s3;
 pub mod tui;
 pub mod tui_event;
 mod tui_logger;
@@ -19,120 +14,13 @@ pub mod waldecoder;
 pub mod walreceiver;
 pub mod walredo;

+#[allow(dead_code)]
 #[derive(Debug, Clone)]
 pub struct PageServerConf {
+    pub data_dir: PathBuf,
    pub daemonize: bool,
    pub interactive: bool,
+    pub wal_producer_connstr: Option<String>,
    pub listen_addr: SocketAddr,
-    pub gc_horizon: u64,
-    pub gc_period: Duration,
-
-    // Repository directory, relative to current working directory.
-    // Normally, the page server changes the current working directory
-    // to the repository, and 'workdir' is always '.'. But we don't do
-    // that during unit testing, because the current directory is global
-    // to the process but different unit tests work on different
-    // repositories.
-    pub workdir: PathBuf,
-
-    pub pg_distrib_dir: PathBuf,
-}
-
-impl PageServerConf {
-    //
-    // Repository paths, relative to workdir.
-    //
-
-    fn tag_path(&self, name: &str) -> PathBuf {
-        self.workdir.join("refs").join("tags").join(name)
-    }
-
-    fn branch_path(&self, name: &str) -> PathBuf {
-        self.workdir.join("refs").join("branches").join(name)
-    }
-
-    fn timeline_path(&self, timelineid: ZTimelineId) -> PathBuf {
-        self.workdir.join("timelines").join(timelineid.to_string())
-    }
-
-    fn snapshots_path(&self, timelineid: ZTimelineId) -> PathBuf {
-        self.timeline_path(timelineid).join("snapshots")
-    }
-
-    fn ancestor_path(&self, timelineid: ZTimelineId) -> PathBuf {
-        self.timeline_path(timelineid).join("ancestor")
-    }
-
-    //
-    // Postgres distribution paths
-    //
-
-    pub fn pg_bin_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("bin")
-    }
-
-    pub fn pg_lib_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("lib")
-    }
-}
-
-/// Zenith Timeline ID is a 128-bit random ID.
-///
-/// Zenith timeline IDs are different from PostgreSQL timeline
-/// IDs. They serve a similar purpose though: they differentiate
-/// between different "histories" of the same cluster.  However,
-/// PostgreSQL timeline IDs are a bit cumbersome, because they are only
-/// 32-bits wide, and they must be in ascending order in any given
-/// timeline history.  Those limitations mean that we cannot generate a
-/// new PostgreSQL timeline ID by just generating a random number. And
-/// that in turn is problematic for the "pull/push" workflow, where you
-/// have a local copy of a zenith repository, and you periodically sync
-/// the local changes with a remote server. When you work "detached"
-/// from the remote server, you cannot create a PostgreSQL timeline ID
-/// that's guaranteed to be different from all existing timelines in
-/// the remote server. For example, if two people are having a clone of
-/// the repository on their laptops, and they both create a new branch
-/// with different name. What timeline ID would they assign to their
-/// branches? If they pick the same one, and later try to push the
-/// branches to the same remote server, they will get mixed up.
-///
-/// To avoid those issues, Zenith has its own concept of timelines that
-/// is separate from PostgreSQL timelines, and doesn't have those
-/// limitations. A zenith timeline is identified by a 128-bit ID, which
-/// is usually printed out as a hex string.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-pub struct ZTimelineId([u8; 16]);
-
-impl FromStr for ZTimelineId {
-    type Err = hex::FromHexError;
-
-    fn from_str(s: &str) -> Result<ZTimelineId, Self::Err> {
-        let timelineid = hex::decode(s)?;
-
-        let mut buf: [u8; 16] = [0u8; 16];
-        buf.copy_from_slice(timelineid.as_slice());
-        Ok(ZTimelineId(buf))
-    }
-}
-
-impl ZTimelineId {
-    pub fn from(b: [u8; 16]) -> ZTimelineId {
-        ZTimelineId(b)
-    }
-
-    pub fn get_from_buf(buf: &mut dyn bytes::Buf) -> ZTimelineId {
-        let mut arr = [0u8; 16];
-        buf.copy_to_slice(&mut arr);
-        ZTimelineId::from(arr)
-    }
-
-    pub fn as_arr(&self) -> [u8; 16] {
-        self.0
-    }
-}
-
-impl fmt::Display for ZTimelineId {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str(&hex::encode(self.0))
-    }
+    pub skip_recovery: bool,
 }
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -1,32 +1,793 @@
-//! This module acts as a switchboard to access different repositories managed by this
-//! page server. Currently, a Page Server can only manage one repository, so there
-//! isn't much here. If we implement multi-tenancy, this will probably be changed into
-//! a hash map, keyed by the tenant ID.
+//
+// Page Cache holds all the different page versions and WAL records
+//
+// The Page Cache is a BTreeMap, keyed by the RelFileNode an blocknumber, and the LSN.
+// The BTreeMap is protected by a Mutex, and each cache entry is protected by another
+// per-entry mutex.
+//

-use crate::repository::rocksdb::RocksRepository;
-use crate::repository::Repository;
-use crate::walredo::PostgresRedoManager;
-use crate::PageServerConf;
+use core::ops::Bound::Included;
+use std::collections::{BTreeMap, HashMap};
+use std::{convert::TryInto, ops::AddAssign};
+
+use std::error::Error;
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering;
+use std::sync::{Arc, Condvar, Mutex};
+use std::thread;
+use std::time::Duration;
+// use tokio::sync::RwLock;
+use bytes::Bytes;
 use lazy_static::lazy_static;
-use std::sync::{Arc, Mutex};
+use log::*;
+use rand::Rng;
+
+use crate::{controlfile, walredo, PageServerConf};
+
+use crossbeam_channel::unbounded;
+use crossbeam_channel::{Receiver, Sender};
+
+// Timeout when waiting or WAL receiver to catch up to an LSN given in a GetPage@LSN call.
+static TIMEOUT: Duration = Duration::from_secs(60);
+
+pub struct PageCache {
+    shared: Mutex<PageCacheShared>,
+
+    // Channel for communicating with the WAL redo process here.
+    pub walredo_sender: Sender<Arc<CacheEntry>>,
+    pub walredo_receiver: Receiver<Arc<CacheEntry>>,
+
+    valid_lsn_condvar: Condvar,
+
+    // Counters, for metrics collection.
+    pub num_entries: AtomicU64,
+    pub num_page_images: AtomicU64,
+    pub num_wal_records: AtomicU64,
+    pub num_getpage_requests: AtomicU64,
+
+    // copies of shared.first/last_valid_lsn fields (copied here so
+    // that they can be read without acquiring the mutex).
+    pub first_valid_lsn: AtomicU64,
+    pub last_valid_lsn: AtomicU64,
+    pub last_record_lsn: AtomicU64,
+}
+
+#[derive(Clone)]
+pub struct PageCacheStats {
+    pub num_entries: u64,
+    pub num_page_images: u64,
+    pub num_wal_records: u64,
+    pub num_getpage_requests: u64,
+    pub first_valid_lsn: u64,
+    pub last_valid_lsn: u64,
+    pub last_record_lsn: u64,
+}
+
+impl AddAssign for PageCacheStats {
+    fn add_assign(&mut self, other: Self) {
+        *self = Self {
+            num_entries: self.num_entries + other.num_entries,
+            num_page_images: self.num_page_images + other.num_page_images,
+            num_wal_records: self.num_wal_records + other.num_wal_records,
+            num_getpage_requests: self.num_getpage_requests + other.num_getpage_requests,
+            first_valid_lsn: self.first_valid_lsn + other.first_valid_lsn,
+            last_valid_lsn: self.last_valid_lsn + other.last_valid_lsn,
+            last_record_lsn: self.last_record_lsn + other.last_record_lsn,
+        }
+    }
+}
+
+//
+// Shared data structure, holding page cache and related auxiliary information
+//
+struct PageCacheShared {
+    // The actual page cache
+    pagecache: BTreeMap<CacheKey, Arc<CacheEntry>>,
+
+    // Relation n_blocks cache
+    //
+    // This hashtable should be updated together with the pagecache. Now it is
+    // accessed unreasonably often through the smgr_nblocks(). It is better to just
+    // cache it in postgres smgr and ask only on restart.
+    relsize_cache: HashMap<RelTag, u32>,
+
+    // What page versions do we hold in the cache? If we get GetPage with
+    // LSN < first_valid_lsn, that's an error because we (no longer) hold that
+    // page version. If we get a request > last_valid_lsn, we need to wait until
+    // we receive all the WAL up to the request.
+    //
+    // last_record_lsn points to the end of last processed WAL record.
+    // It can lag behind last_valid_lsn, if the WAL receiver has received some WAL
+    // after the end of last record, but not the whole next record yet. In the
+    // page cache, we care about last_valid_lsn, but if the WAL receiver needs to
+    // restart the streaming, it needs to restart at the end of last record, so
+    // we track them separately. last_record_lsn should perhaps be in
+    // walreceiver.rs instead of here, but it seems convenient to keep all three
+    // values together.
+    //
+    first_valid_lsn: u64,
+    last_valid_lsn: u64,
+    last_record_lsn: u64,
+
+    controldata: controlfile::ControlFileDataZenith,
+}

 lazy_static! {
-    pub static ref REPOSITORY: Mutex<Option<Arc<dyn Repository + Send + Sync>>> = Mutex::new(None);
+    pub static ref PAGECACHES: Mutex<HashMap<u64, Arc<PageCache>>> = Mutex::new(HashMap::new());
 }

-pub fn init(conf: &'static PageServerConf) {
-    let mut m = REPOSITORY.lock().unwrap();
+pub fn get_pagecache(conf: PageServerConf, sys_id: u64) -> Arc<PageCache> {
+    let mut pcaches = PAGECACHES.lock().unwrap();

-    // Set up a WAL redo manager, for applying WAL records.
-    let walredo_mgr = PostgresRedoManager::new(conf);
+    if !pcaches.contains_key(&sys_id) {
+        pcaches.insert(sys_id, Arc::new(init_page_cache()));

-    // we have already changed current dir to the repository.
-    let repo = RocksRepository::new(conf, Arc::new(walredo_mgr));
+        // Initialize the WAL redo thread
+        //
+        // Now join_handle is not saved any where and we won'try restart tharead
+        // if it is dead. We may later stop that treads after some inactivity period
+        // and restart them on demand.
+        let _walredo_thread = thread::Builder::new()
+            .name("WAL redo thread".into())
+            .spawn(move || {
+                walredo::wal_redo_main(conf, sys_id);
+            })
+            .unwrap();
+    }

-    *m = Some(Arc::new(repo));
+    pcaches.get(&sys_id).unwrap().clone()
 }

-pub fn get_repository() -> Arc<dyn Repository + Send + Sync> {
-    let o = &REPOSITORY.lock().unwrap();
-    Arc::clone(o.as_ref().unwrap())
+fn init_page_cache() -> PageCache {
+    // Initialize the channel between the page cache and the WAL applicator
+    let (s, r) = unbounded();
+
+    PageCache {
+        shared: Mutex::new(PageCacheShared {
+            pagecache: BTreeMap::new(),
+            relsize_cache: HashMap::new(),
+            first_valid_lsn: 0,
+            last_valid_lsn: 0,
+            last_record_lsn: 0,
+            controldata: controlfile::ControlFileDataZenith::new(),
+        }),
+        valid_lsn_condvar: Condvar::new(),
+
+        walredo_sender: s,
+        walredo_receiver: r,
+
+        num_entries: AtomicU64::new(0),
+        num_page_images: AtomicU64::new(0),
+        num_wal_records: AtomicU64::new(0),
+        num_getpage_requests: AtomicU64::new(0),
+
+        first_valid_lsn: AtomicU64::new(0),
+        last_valid_lsn: AtomicU64::new(0),
+        last_record_lsn: AtomicU64::new(0),
+    }
+}
+
+//
+// We store two kinds of entries in the page cache:
+//
+// 1. Ready-made images of the block
+// 2. WAL records, to be applied on top of the "previous" entry
+//
+// Some WAL records will initialize the page from scratch. For such records,
+// the 'will_init' flag is set. They don't need the previous page image before
+// applying. The 'will_init' flag is set for records containing a full-page image,
+// and for records with the BKPBLOCK_WILL_INIT flag. These differ from PageImages
+// stored directly in the cache entry in that you still need to run the WAL redo
+// routine to generate the page image.
+//
+#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
+pub struct CacheKey {
+    pub tag: BufferTag,
+    pub lsn: u64,
+}
+
+pub struct CacheEntry {
+    pub key: CacheKey,
+
+    pub content: Mutex<CacheEntryContent>,
+
+    // Condition variable used by the WAL redo service, to wake up
+    // requester.
+    //
+    // FIXME: this takes quite a lot of space. Consider using parking_lot::Condvar
+    // or something else.
+    pub walredo_condvar: Condvar,
+}
+
+pub struct CacheEntryContent {
+    pub page_image: Option<Bytes>,
+    pub wal_record: Option<WALRecord>,
+    pub apply_pending: bool,
+}
+
+impl CacheEntry {
+    fn new(key: CacheKey) -> CacheEntry {
+        CacheEntry {
+            key: key,
+            content: Mutex::new(CacheEntryContent {
+                page_image: None,
+                wal_record: None,
+                apply_pending: false,
+            }),
+            walredo_condvar: Condvar::new(),
+        }
+    }
+}
+
+#[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)]
+pub struct RelTag {
+    pub spcnode: u32,
+    pub dbnode: u32,
+    pub relnode: u32,
+    pub forknum: u8,
+}
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)]
+pub struct BufferTag {
+    pub spcnode: u32,
+    pub dbnode: u32,
+    pub relnode: u32,
+    pub forknum: u8,
+    pub blknum: u32,
+}
+
+#[derive(Clone)]
+pub struct WALRecord {
+    pub lsn: u64, // LSN at the *end* of the record
+    pub will_init: bool,
+    pub rec: Bytes,
+}
+
+// Public interface functions
+
+impl PageCache {
+    pub fn get_nonrel_page(&self, tag: BufferTag, _reqlsn: u64) -> Result<Bytes, Box<dyn Error>> {
+        self.num_getpage_requests.fetch_add(1, Ordering::Relaxed);
+
+        // Now we don't have versioning for non-rel pages.
+        // Also at bootstrap we don't know lsn for some files.
+        // So always request the very latest version
+        // let lsn = reqlsn;
+
+        let lsn = u64::MAX;
+
+        let minkey = CacheKey { tag: tag, lsn: 0 };
+        // Look up to the largest lsn
+        let maxkey = CacheKey { tag: tag, lsn: lsn };
+
+        let entry_rc: Arc<CacheEntry>;
+        {
+            let shared = self.shared.lock().unwrap();
+
+            let pagecache = &shared.pagecache;
+            info!("got pagecache {}", pagecache.len());
+
+            let mut entries = pagecache.range((Included(&minkey), Included(&maxkey)));
+
+            let entry_opt = entries.next_back();
+
+            if entry_opt.is_none() {
+                return Err(format!(
+                    "not found non-rel page with LSN {} for {}/{}/{}.{} blk {}",
+                    lsn, tag.spcnode, tag.dbnode, tag.relnode, tag.forknum, tag.blknum
+                ))?;
+            }
+
+            info!(
+                "found non-rel page with LSN {} for {}/{}/{}.{} blk {}",
+                lsn, tag.spcnode, tag.dbnode, tag.relnode, tag.forknum, tag.blknum
+            );
+
+            let (_key, entry) = entry_opt.unwrap();
+            entry_rc = entry.clone();
+
+            // Now that we have a reference to the cache entry, drop the lock on the map.
+            // It's important to do this before waiting on the condition variable below,
+            // and better to do it as soon as possible to maximize concurrency.
+        }
+
+        // Lock the cache entry and dig the page image out of it.
+        let page_img: Bytes;
+        {
+            let entry_content = entry_rc.content.lock().unwrap();
+
+            if let Some(img) = &entry_content.page_image {
+                assert!(!entry_content.apply_pending);
+                page_img = img.clone();
+            } else if entry_content.wal_record.is_some() {
+                return Err("non-rel WAL redo is not implemented yet".into());
+                //
+                // If this page needs to be reconstructed by applying some WAL,
+                // send a request to the WAL redo thread.
+                //
+                // if !entry_content.apply_pending {
+                //     assert!(!entry_content.apply_pending);
+                //     entry_content.apply_pending = true;
+
+                //     let s = &self.walredo_sender;
+                //     s.send(entry_rc.clone())?;
+                // }
+
+                // while entry_content.apply_pending {
+                //     entry_content = entry_rc.walredo_condvar.wait(entry_content).unwrap();
+                //}
+
+                // We should now have a page image. If we don't, it means that WAL redo
+                // failed to reconstruct it. WAL redo should've logged that error already.
+                // page_img = match &entry_content.page_image {
+                //     Some(p) => p.clone(),
+                //     None => {
+                //         error!("could not apply WAL to reconstruct page image for GetPage@LSN request");
+                //         return Err("could not apply WAL to reconstruct page image".into());
+                //     }
+                // };
+            } else {
+                // No base image, and no WAL record. Huh?
+                return Err(format!("no page image or WAL record for requested page"))?;
+            }
+        }
+
+        trace!(
+            "Returning page for {}/{}/{}.{} blk {}",
+            tag.spcnode,
+            tag.dbnode,
+            tag.relnode,
+            tag.forknum,
+            tag.blknum
+        );
+
+        return Ok(page_img);
+    }
+
+    //
+    // GetPage@LSN
+    //
+    // Returns an 8k page image
+    //
+    pub fn get_page_at_lsn(&self, tag: BufferTag, reqlsn: u64) -> Result<Bytes, Box<dyn Error>> {
+        let mut lsn = reqlsn;
+
+        if tag.forknum > 40 {
+            info!(
+                "get_page_at_lsn got request for page with LSN {} for {}/{}/{}.{} blk {}",
+                lsn, tag.spcnode, tag.dbnode, tag.relnode, tag.forknum, tag.blknum
+            );
+
+            return self.get_nonrel_page(tag, lsn);
+        }
+
+        if reqlsn == 0 {
+            let c = self.get_controldata();
+            lsn = c.checkPoint;
+
+            info!("update reqlsn get_page_at_lsn got request for page with LSN {} for {}/{}/{}.{} blk {}", lsn,
+        tag.spcnode, tag.dbnode, tag.relnode, tag.forknum, tag.blknum);
+        }
+
+        self.num_getpage_requests.fetch_add(1, Ordering::Relaxed);
+
+        // Look up cache entry. If it's a page image, return that. If it's a WAL record,
+        // ask the WAL redo service to reconstruct the page image from the WAL records.
+        let minkey = CacheKey { tag: tag, lsn: 0 };
+        let maxkey = CacheKey { tag: tag, lsn: lsn };
+        let entry_rc: Arc<CacheEntry>;
+        {
+            let mut shared = self.shared.lock().unwrap();
+
+            let mut waited = false;
+
+            // When server just started and created checkpoint lsn,
+            // but we have not yet established connection,
+            // requested lsn will be larger than the one we have
+            while lsn > shared.last_valid_lsn + 500 {
+                // TODO: Wait for the WAL receiver to catch up
+                waited = true;
+                trace!(
+                    "not caught up yet: {}, requested {}",
+                    shared.last_valid_lsn,
+                    lsn
+                );
+                let wait_result = self
+                    .valid_lsn_condvar
+                    .wait_timeout(shared, TIMEOUT)
+                    .unwrap();
+
+                shared = wait_result.0;
+                if wait_result.1.timed_out() {
+                    return Err(format!(
+                        "Timed out while waiting for WAL record at LSN {} to arrive",
+                        lsn
+                    ))?;
+                }
+            }
+            if waited {
+                trace!("caught up now, continuing");
+            }
+
+            if lsn < shared.first_valid_lsn {
+                return Err(format!("LSN {} has already been removed", lsn))?;
+            }
+
+            let pagecache = &shared.pagecache;
+
+            let mut entries = pagecache.range((Included(&minkey), Included(&maxkey)));
+
+            let entry_opt = entries.next_back();
+
+            if entry_opt.is_none() {
+                //static ZERO_PAGE:[u8; 8192] = [0 as u8; 8192];
+                //return Ok(Bytes::from_static(&ZERO_PAGE));
+                return Err("could not find page image")?;
+            }
+            let (_key, entry) = entry_opt.unwrap();
+            entry_rc = entry.clone();
+
+            // Now that we have a reference to the cache entry, drop the lock on the map.
+            // It's important to do this before waiting on the condition variable below,
+            // and better to do it as soon as possible to maximize concurrency.
+        }
+
+        // Lock the cache entry and dig the page image out of it.
+        let page_img: Bytes;
+        {
+            let mut entry_content = entry_rc.content.lock().unwrap();
+
+            if let Some(img) = &entry_content.page_image {
+                assert!(!entry_content.apply_pending);
+                page_img = img.clone();
+            } else if entry_content.wal_record.is_some() {
+                //
+                // If this page needs to be reconstructed by applying some WAL,
+                // send a request to the WAL redo thread.
+                //
+                if !entry_content.apply_pending {
+                    assert!(!entry_content.apply_pending);
+                    entry_content.apply_pending = true;
+
+                    let s = &self.walredo_sender;
+                    s.send(entry_rc.clone())?;
+                }
+
+                while entry_content.apply_pending {
+                    entry_content = entry_rc.walredo_condvar.wait(entry_content).unwrap();
+                }
+
+                // We should now have a page image. If we don't, it means that WAL redo
+                // failed to reconstruct it. WAL redo should've logged that error already.
+                page_img = match &entry_content.page_image {
+                    Some(p) => p.clone(),
+                    None => {
+                        error!(
+                            "could not apply WAL to reconstruct page image for GetPage@LSN request"
+                        );
+                        return Err("could not apply WAL to reconstruct page image".into());
+                    }
+                };
+            } else {
+                // No base image, and no WAL record. Huh?
+                return Err(format!("no page image or WAL record for requested page"))?;
+            }
+        }
+
+        // FIXME: assumes little-endian. Only used for the debugging log though
+        let page_lsn_hi = u32::from_le_bytes(page_img.get(0..4).unwrap().try_into().unwrap());
+        let page_lsn_lo = u32::from_le_bytes(page_img.get(4..8).unwrap().try_into().unwrap());
+        trace!(
+            "Returning page with LSN {:X}/{:X} for {}/{}/{}.{} blk {}",
+            page_lsn_hi,
+            page_lsn_lo,
+            tag.spcnode,
+            tag.dbnode,
+            tag.relnode,
+            tag.forknum,
+            tag.blknum
+        );
+
+        return Ok(page_img);
+    }
+
+    //
+    // Collect all the WAL records that are needed to reconstruct a page
+    // image for the given cache entry.
+    //
+    // Returns an old page image (if any), and a vector of WAL records to apply
+    // over it.
+    //
+    pub fn collect_records_for_apply(&self, entry: &CacheEntry) -> (Option<Bytes>, Vec<WALRecord>) {
+        // Scan the BTreeMap backwards, starting from the given entry.
+        let shared = self.shared.lock().unwrap();
+        let pagecache = &shared.pagecache;
+
+        let minkey = CacheKey {
+            tag: entry.key.tag,
+            lsn: 0,
+        };
+        let maxkey = CacheKey {
+            tag: entry.key.tag,
+            lsn: entry.key.lsn,
+        };
+        let entries = pagecache.range((Included(&minkey), Included(&maxkey)));
+
+        // the last entry in the range should be the CacheEntry we were given
+        //let _last_entry = entries.next_back();
+        //assert!(last_entry == entry);
+
+        let mut base_img: Option<Bytes> = None;
+        let mut records: Vec<WALRecord> = Vec::new();
+
+        // Scan backwards, collecting the WAL records, until we hit an
+        // old page image.
+        for (_key, e) in entries.rev() {
+            let e = e.content.lock().unwrap();
+
+            if let Some(img) = &e.page_image {
+                // We have a base image. No need to dig deeper into the list of
+                // records
+                base_img = Some(img.clone());
+                break;
+            } else if let Some(rec) = &e.wal_record {
+                records.push(rec.clone());
+
+                // If this WAL record initializes the page, no need to dig deeper.
+                if rec.will_init {
+                    break;
+                }
+            } else {
+                panic!("no base image and no WAL record on cache entry");
+            }
+        }
+
+        records.reverse();
+        return (base_img, records);
+    }
+
+    //
+    // Adds a WAL record to the page cache
+    //
+    pub fn put_wal_record(&self, tag: BufferTag, rec: WALRecord) {
+        let key = CacheKey {
+            tag: tag,
+            lsn: rec.lsn,
+        };
+
+        let entry = CacheEntry::new(key.clone());
+        entry.content.lock().unwrap().wal_record = Some(rec);
+
+        let mut shared = self.shared.lock().unwrap();
+
+        let rel_tag = RelTag {
+            spcnode: tag.spcnode,
+            dbnode: tag.dbnode,
+            relnode: tag.relnode,
+            forknum: tag.forknum,
+        };
+        let rel_entry = shared.relsize_cache.entry(rel_tag).or_insert(0);
+        if tag.blknum >= *rel_entry {
+            *rel_entry = tag.blknum + 1;
+        }
+
+        trace!("put_wal_record lsn: {}", key.lsn);
+
+        let oldentry = shared.pagecache.insert(key, Arc::new(entry));
+        self.num_entries.fetch_add(1, Ordering::Relaxed);
+
+        if !oldentry.is_none() {
+            error!("overwriting WAL record in page cache");
+        }
+
+        self.num_wal_records.fetch_add(1, Ordering::Relaxed);
+    }
+
+    //
+    // Memorize a full image of a page version
+    //
+    pub fn put_page_image(&self, tag: BufferTag, lsn: u64, img: Bytes) {
+        let key = CacheKey { tag: tag, lsn: lsn };
+
+        let entry = CacheEntry::new(key.clone());
+        entry.content.lock().unwrap().page_image = Some(img);
+
+        let mut shared = self.shared.lock().unwrap();
+        let pagecache = &mut shared.pagecache;
+
+        let oldentry = pagecache.insert(key, Arc::new(entry));
+        self.num_entries.fetch_add(1, Ordering::Relaxed);
+        assert!(oldentry.is_none());
+
+        debug!(
+            "inserted page image for {}/{}/{}_{} blk {} at {}",
+            tag.spcnode, tag.dbnode, tag.relnode, tag.forknum, tag.blknum, lsn
+        );
+
+        self.num_page_images.fetch_add(1, Ordering::Relaxed);
+    }
+
+    //
+    pub fn advance_last_valid_lsn(&self, lsn: u64) {
+        let mut shared = self.shared.lock().unwrap();
+
+        // Can't move backwards.
+        assert!(lsn >= shared.last_valid_lsn);
+
+        shared.last_valid_lsn = lsn;
+        self.valid_lsn_condvar.notify_all();
+
+        self.last_valid_lsn.store(lsn, Ordering::Relaxed);
+    }
+
+    //
+    // NOTE: this updates last_valid_lsn as well.
+    //
+    pub fn advance_last_record_lsn(&self, lsn: u64) {
+        let mut shared = self.shared.lock().unwrap();
+
+        // Can't move backwards.
+        assert!(lsn >= shared.last_valid_lsn);
+        assert!(lsn >= shared.last_record_lsn);
+
+        shared.last_valid_lsn = lsn;
+        shared.last_record_lsn = lsn;
+        self.valid_lsn_condvar.notify_all();
+
+        self.last_valid_lsn.store(lsn, Ordering::Relaxed);
+        self.last_valid_lsn.store(lsn, Ordering::Relaxed);
+    }
+
+    //
+    pub fn _advance_first_valid_lsn(&self, lsn: u64) {
+        let mut shared = self.shared.lock().unwrap();
+
+        // Can't move backwards.
+        assert!(lsn >= shared.first_valid_lsn);
+
+        // Can't overtake last_valid_lsn (except when we're
+        // initializing the system and last_valid_lsn hasn't been set yet.
+        assert!(shared.last_valid_lsn == 0 || lsn < shared.last_valid_lsn);
+
+        shared.first_valid_lsn = lsn;
+        self.first_valid_lsn.store(lsn, Ordering::Relaxed);
+    }
+
+    pub fn init_valid_lsn(&self, lsn: u64) {
+        let mut shared = self.shared.lock().unwrap();
+
+        assert!(shared.first_valid_lsn == 0);
+        assert!(shared.last_valid_lsn == 0);
+        assert!(shared.last_record_lsn == 0);
+
+        shared.first_valid_lsn = lsn;
+        shared.last_valid_lsn = lsn;
+        shared.last_record_lsn = lsn;
+
+        self.first_valid_lsn.store(lsn, Ordering::Relaxed);
+        self.last_valid_lsn.store(lsn, Ordering::Relaxed);
+        self.last_record_lsn.store(lsn, Ordering::Relaxed);
+    }
+
+    pub fn get_last_valid_lsn(&self) -> u64 {
+        let shared = self.shared.lock().unwrap();
+
+        return shared.last_record_lsn;
+    }
+
+    pub fn set_controldata(&self, c: controlfile::ControlFileDataZenith) {
+        let mut shared = self.shared.lock().unwrap();
+        shared.controldata = c;
+    }
+
+    pub fn get_controldata(&self) -> controlfile::ControlFileDataZenith {
+        let shared = self.shared.lock().unwrap();
+        return shared.controldata.clone();
+    }
+
+    //
+    // Simple test function for the WAL redo code:
+    //
+    // 1. Pick a page from the page cache at random.
+    // 2. Request that page with GetPage@LSN, using Max LSN (i.e. get the latest page version)
+    //
+    //
+    pub fn _test_get_page_at_lsn(&self) {
+        // for quick testing of the get_page_at_lsn() funcion.
+        //
+        // Get a random page from the page cache. Apply all its WAL, by requesting
+        // that page at the highest lsn.
+
+        let mut tag: Option<BufferTag> = None;
+
+        {
+            let shared = self.shared.lock().unwrap();
+            let pagecache = &shared.pagecache;
+
+            if pagecache.is_empty() {
+                info!("page cache is empty");
+                return;
+            }
+
+            // Find nth entry in the map, where n is picked at random
+            let n = rand::thread_rng().gen_range(0..pagecache.len());
+            let mut i = 0;
+            for (key, _e) in pagecache.iter() {
+                if i == n {
+                    tag = Some(key.tag);
+                    break;
+                }
+                i += 1;
+            }
+        }
+
+        info!("testing GetPage@LSN for block {}", tag.unwrap().blknum);
+        match self.get_page_at_lsn(tag.unwrap(), 0xffff_ffff_ffff_eeee) {
+            Ok(_img) => {
+                // This prints out the whole page image.
+                //println!("{:X?}", img);
+            }
+            Err(error) => {
+                error!("GetPage@LSN failed: {}", error);
+            }
+        }
+    }
+
+    // FIXME: Shouldn't relation size also be tracked with an LSN?
+    // If a replica is lagging behind, it needs to get the size as it was on
+    // the replica's current replay LSN.
+    pub fn relsize_inc(&self, rel: &RelTag, to: Option<u32>) {
+        let mut shared = self.shared.lock().unwrap();
+        let entry = shared.relsize_cache.entry(*rel).or_insert(0);
+
+        if let Some(to) = to {
+            if to >= *entry {
+                *entry = to + 1;
+            }
+        }
+        trace!("relsize_inc {:?} to {}", rel, entry);
+    }
+
+    pub fn relsize_get(&self, rel: &RelTag) -> u32 {
+        let mut shared = self.shared.lock().unwrap();
+        let entry = shared.relsize_cache.entry(*rel).or_insert(0);
+        *entry
+    }
+
+    pub fn relsize_exist(&self, rel: &RelTag) -> bool {
+        let shared = self.shared.lock().unwrap();
+        let relsize_cache = &shared.relsize_cache;
+        relsize_cache.contains_key(rel)
+    }
+
+    pub fn get_stats(&self) -> PageCacheStats {
+        PageCacheStats {
+            num_entries: self.num_entries.load(Ordering::Relaxed),
+            num_page_images: self.num_page_images.load(Ordering::Relaxed),
+            num_wal_records: self.num_wal_records.load(Ordering::Relaxed),
+            num_getpage_requests: self.num_getpage_requests.load(Ordering::Relaxed),
+            first_valid_lsn: self.first_valid_lsn.load(Ordering::Relaxed),
+            last_valid_lsn: self.last_valid_lsn.load(Ordering::Relaxed),
+            last_record_lsn: self.last_record_lsn.load(Ordering::Relaxed),
+        }
+    }
+}
+
+pub fn get_stats() -> PageCacheStats {
+    let pcaches = PAGECACHES.lock().unwrap();
+
+    let mut stats = PageCacheStats {
+        num_entries: 0,
+        num_page_images: 0,
+        num_wal_records: 0,
+        num_getpage_requests: 0,
+        first_valid_lsn: 0,
+        last_valid_lsn: 0,
+        last_record_lsn: 0,
+    };
+
+    pcaches.iter().for_each(|(_sys_id, pcache)| {
+        stats += pcache.get_stats();
+    });
+    stats
 }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
--- a/pageserver/src/pg_constants.rs
+++ b/pageserver/src/pg_constants.rs
@@ -0,0 +1,11 @@
+// From pg_tablespace_d.h
+//
+pub const DEFAULTTABLESPACE_OID: u32 = 1663;
+pub const GLOBALTABLESPACE_OID: u32 = 1664;
+//Special values for non-rel files' tags
+//TODO maybe use enum?
+pub const PG_CONTROLFILE_FORKNUM: u32 = 42;
+pub const PG_FILENODEMAP_FORKNUM: u32 = 43;
+pub const PG_XACT_FORKNUM: u32 = 44;
+pub const PG_MXACT_OFFSETS_FORKNUM: u32 = 45;
+pub const PG_MXACT_MEMBERS_FORKNUM: u32 = 46;
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -1,532 +0,0 @@
-pub mod rocksdb;
-
-use crate::waldecoder::{DecodedWALRecord, Oid, TransactionId, XlCreateDatabase, XlSmgrTruncate};
-use crate::ZTimelineId;
-use anyhow::Result;
-use bytes::{Buf, BufMut, Bytes, BytesMut};
-use postgres_ffi::pg_constants;
-use postgres_ffi::relfile_utils::forknumber_to_name;
-use std::fmt;
-use std::sync::Arc;
-use zenith_utils::lsn::Lsn;
-
-///
-/// A repository corresponds to one .zenith directory. One repository holds multiple
-/// timelines, forked off from the same initial call to 'initdb'.
-pub trait Repository {
-    /// Get Timeline handle for given zenith timeline ID.
-    ///
-    /// The Timeline is expected to be already "open", i.e. `get_or_restore_timeline`
-    /// should've been called on it earlier already.
-    fn get_timeline(&self, timelineid: ZTimelineId) -> Result<Arc<dyn Timeline>>;
-
-    /// Get Timeline handle for given zenith timeline ID.
-    ///
-    /// Creates a new Timeline object if it's not "open" already.
-    fn get_or_restore_timeline(&self, timelineid: ZTimelineId) -> Result<Arc<dyn Timeline>>;
-
-    /// Create an empty timeline, without loading any data into it from possible on-disk snapshot.
-    ///
-    /// For unit tests.
-    #[cfg(test)]
-    fn create_empty_timeline(&self, timelineid: ZTimelineId) -> Result<Arc<dyn Timeline>>;
-
-    //fn get_stats(&self) -> RepositoryStats;
-}
-
-pub trait Timeline {
-    //------------------------------------------------------------------------------
-    // Public GET functions
-    //------------------------------------------------------------------------------
-
-    /// Look up given page in the cache.
-    fn get_page_at_lsn(&self, tag: BufferTag, lsn: Lsn) -> Result<Bytes>;
-
-    /// Get size of relation
-    fn get_relsize(&self, tag: RelTag, lsn: Lsn) -> Result<u32>;
-
-    /// Does relation exist?
-    fn get_relsize_exists(&self, tag: RelTag, lsn: Lsn) -> Result<bool>;
-
-    /// Get page image at the particular LSN
-    fn get_page_image(&self, tag: BufferTag, lsn: Lsn) -> Result<Option<Bytes>>;
-
-    //------------------------------------------------------------------------------
-    // Public PUT functions, to update the repository with new page versions.
-    //
-    // These are called by the WAL receiver to digest WAL records.
-    //------------------------------------------------------------------------------
-
-    /// Put a new page version that can be constructed from a WAL record
-    ///
-    /// This will implicitly extend the relation, if the page is beyond the
-    /// current end-of-file.
-    fn put_wal_record(&self, tag: BufferTag, rec: WALRecord);
-
-    /// Like put_wal_record, but with ready-made image of the page.
-    fn put_page_image(&self, tag: BufferTag, lsn: Lsn, img: Bytes);
-
-    /// Truncate relation
-    fn put_truncation(&self, rel: RelTag, lsn: Lsn, nblocks: u32) -> Result<()>;
-
-    /// Create a new database from a template database
-    ///
-    /// In PostgreSQL, CREATE DATABASE works by scanning the data directory and
-    /// copying all relation files from the template database. This is the equivalent
-    /// of that.
-    fn put_create_database(
-        &self,
-        lsn: Lsn,
-        db_id: Oid,
-        tablespace_id: Oid,
-        src_db_id: Oid,
-        src_tablespace_id: Oid,
-    ) -> Result<()>;
-
-    ///
-    /// Helper function to parse a WAL record and call the above functions for all the
-    /// relations/pages that the record affects.
-    ///
-    fn save_decoded_record(
-        &self,
-        decoded: DecodedWALRecord,
-        recdata: Bytes,
-        lsn: Lsn,
-    ) -> Result<()> {
-        // Figure out which blocks the record applies to, and "put" a separate copy
-        // of the record for each block.
-        for blk in decoded.blocks.iter() {
-            let tag = BufferTag {
-                rel: RelTag {
-                    spcnode: blk.rnode_spcnode,
-                    dbnode: blk.rnode_dbnode,
-                    relnode: blk.rnode_relnode,
-                    forknum: blk.forknum as u8,
-                },
-                blknum: blk.blkno,
-            };
-
-            let rec = WALRecord {
-                lsn,
-                will_init: blk.will_init || blk.apply_image,
-                rec: recdata.clone(),
-                main_data_offset: decoded.main_data_offset as u32,
-            };
-
-            self.put_wal_record(tag, rec);
-        }
-
-        // Handle a few special record types
-        if decoded.xl_rmid == pg_constants::RM_SMGR_ID
-            && (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                == pg_constants::XLOG_SMGR_TRUNCATE
-        {
-            let truncate = XlSmgrTruncate::decode(&decoded);
-            if (truncate.flags & pg_constants::SMGR_TRUNCATE_HEAP) != 0 {
-                let rel = RelTag {
-                    spcnode: truncate.rnode.spcnode,
-                    dbnode: truncate.rnode.dbnode,
-                    relnode: truncate.rnode.relnode,
-                    forknum: pg_constants::MAIN_FORKNUM,
-                };
-                self.put_truncation(rel, lsn, truncate.blkno)?;
-            }
-        } else if decoded.xl_rmid == pg_constants::RM_DBASE_ID
-            && (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
-                == pg_constants::XLOG_DBASE_CREATE
-        {
-            let createdb = XlCreateDatabase::decode(&decoded);
-            self.put_create_database(
-                lsn,
-                createdb.db_id,
-                createdb.tablespace_id,
-                createdb.src_db_id,
-                createdb.src_tablespace_id,
-            )?;
-        }
-        // Now that this record has been handled, let the repository know that
-        // it is up-to-date to this LSN
-        self.advance_last_record_lsn(lsn);
-        Ok(())
-    }
-
-    /// Remember the all WAL before the given LSN has been processed.
-    ///
-    /// The WAL receiver calls this after the put_* functions, to indicate that
-    /// all WAL before this point has been digested. Before that, if you call
-    /// GET on an earlier LSN, it will block.
-    fn advance_last_valid_lsn(&self, lsn: Lsn);
-    fn get_last_valid_lsn(&self) -> Lsn;
-    fn init_valid_lsn(&self, lsn: Lsn);
-
-    /// Like `advance_last_valid_lsn`, but this always points to the end of
-    /// a WAL record, not in the middle of one.
-    ///
-    /// This must be <= last valid LSN. This is tracked separately from last
-    /// valid LSN, so that the WAL receiver knows where to restart streaming.
-    fn advance_last_record_lsn(&self, lsn: Lsn);
-    fn get_last_record_lsn(&self) -> Lsn;
-
-    /// Get range [begin,end) of stored blocks. Used mostly for SMGR pseudorelations
-    /// but can be also applied to normal relations.
-    fn get_range(&self, rel: RelTag, lsn: Lsn) -> Result<(u32, u32)>;
-
-    /// Get vector of databases (represented using RelTag only dbnode and spcnode fields are used)
-    fn get_databases(&self, lsn: Lsn) -> Result<Vec<RelTag>>;
-
-    /// Get vector of prepared twophase transactions
-    fn get_twophase(&self, lsn: Lsn) -> Result<Vec<TransactionId>>;
-}
-
-#[derive(Clone)]
-pub struct RepositoryStats {
-    pub num_entries: Lsn,
-    pub num_page_images: Lsn,
-    pub num_wal_records: Lsn,
-    pub num_getpage_requests: Lsn,
-}
-
-#[derive(Debug, PartialEq, Eq, PartialOrd, Hash, Ord, Clone, Copy)]
-pub struct RelTag {
-    pub forknum: u8,
-    pub spcnode: u32,
-    pub dbnode: u32,
-    pub relnode: u32,
-}
-
-impl RelTag {
-    pub fn pack(&self, buf: &mut BytesMut) {
-        buf.put_u8(self.forknum);
-        buf.put_u32(self.spcnode);
-        buf.put_u32(self.dbnode);
-        buf.put_u32(self.relnode);
-    }
-    pub fn unpack(buf: &mut Bytes) -> RelTag {
-        RelTag {
-            forknum: buf.get_u8(),
-            spcnode: buf.get_u32(),
-            dbnode: buf.get_u32(),
-            relnode: buf.get_u32(),
-        }
-    }
-}
-
-/// Display RelTag in the same format that's used in most PostgreSQL debug messages:
-///
-/// <spcnode>/<dbnode>/<relnode>[_fsm|_vm|_init]
-///
-impl fmt::Display for RelTag {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if let Some(forkname) = forknumber_to_name(self.forknum) {
-            write!(
-                f,
-                "{}/{}/{}_{}",
-                self.spcnode, self.dbnode, self.relnode, forkname
-            )
-        } else {
-            write!(f, "{}/{}/{}", self.spcnode, self.dbnode, self.relnode)
-        }
-    }
-}
-
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
-pub struct BufferTag {
-    pub rel: RelTag,
-    pub blknum: u32,
-}
-
-impl BufferTag {
-    pub fn fork(forknum: u8) -> BufferTag {
-        BufferTag {
-            rel: RelTag {
-                forknum,
-                spcnode: 0,
-                dbnode: 0,
-                relnode: 0,
-            },
-            blknum: 0,
-        }
-    }
-
-    pub fn pack(&self, buf: &mut BytesMut) {
-        self.rel.pack(buf);
-        buf.put_u32(self.blknum);
-    }
-    pub fn unpack(buf: &mut Bytes) -> BufferTag {
-        BufferTag {
-            rel: RelTag::unpack(buf),
-            blknum: buf.get_u32(),
-        }
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct WALRecord {
-    pub lsn: Lsn, // LSN at the *end* of the record
-    pub will_init: bool,
-    pub rec: Bytes,
-    // Remember the offset of main_data in rec,
-    // so that we don't have to parse the record again.
-    // If record has no main_data, this offset equals rec.len().
-    pub main_data_offset: u32,
-}
-
-impl WALRecord {
-    pub fn pack(&self, buf: &mut BytesMut) {
-        buf.put_u64(self.lsn.0);
-        buf.put_u8(self.will_init as u8);
-        buf.put_u32(self.main_data_offset);
-        buf.put_u32(self.rec.len() as u32);
-        buf.put_slice(&self.rec[..]);
-    }
-    pub fn unpack(buf: &mut Bytes) -> WALRecord {
-        let lsn = Lsn::from(buf.get_u64());
-        let will_init = buf.get_u8() != 0;
-        let main_data_offset = buf.get_u32();
-        let mut dst = vec![0u8; buf.get_u32() as usize];
-        buf.copy_to_slice(&mut dst);
-        WALRecord {
-            lsn,
-            will_init,
-            rec: Bytes::from(dst),
-            main_data_offset,
-        }
-    }
-}
-
-///
-/// Tests that should work the same with any Repository/Timeline implementation.
-///
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::walredo::{WalRedoError, WalRedoManager};
-    use crate::PageServerConf;
-    use postgres_ffi::pg_constants;
-    use std::fs;
-    use std::path::PathBuf;
-    use std::str::FromStr;
-    use std::time::Duration;
-
-    /// Arbitrary relation tag, for testing.
-    const TESTREL_A: RelTag = RelTag {
-        spcnode: 0,
-        dbnode: 111,
-        relnode: 1000,
-        forknum: 0,
-    };
-
-    /// Convenience function to create a BufferTag for testing.
-    /// Helps to keeps the tests shorter.
-    #[allow(non_snake_case)]
-    fn TEST_BUF(blknum: u32) -> BufferTag {
-        BufferTag {
-            rel: TESTREL_A,
-            blknum,
-        }
-    }
-
-    /// Convenience function to create a page image with given string as the only content
-    #[allow(non_snake_case)]
-    fn TEST_IMG(s: &str) -> Bytes {
-        let mut buf = BytesMut::new();
-        buf.extend_from_slice(s.as_bytes());
-        buf.resize(8192, 0);
-
-        buf.freeze()
-    }
-
-    fn get_test_repo(test_name: &str) -> Result<Box<dyn Repository>> {
-        let repo_dir = PathBuf::from(format!("../tmp_check/test_{}", test_name));
-        let _ = fs::remove_dir_all(&repo_dir);
-        fs::create_dir_all(&repo_dir)?;
-
-        let conf = PageServerConf {
-            daemonize: false,
-            interactive: false,
-            gc_horizon: 64 * 1024 * 1024,
-            gc_period: Duration::from_secs(10),
-            listen_addr: "127.0.0.1:5430".parse().unwrap(),
-            workdir: repo_dir.into(),
-            pg_distrib_dir: "".into(),
-        };
-        // Make a static copy of the config. This can never be free'd, but that's
-        // OK in a test.
-        let conf: &'static PageServerConf = Box::leak(Box::new(conf));
-
-        let walredo_mgr = TestRedoManager {};
-
-        let repo = rocksdb::RocksRepository::new(conf, Arc::new(walredo_mgr));
-
-        Ok(Box::new(repo))
-    }
-
-    /// Test get_relsize() and truncation.
-    ///
-    /// FIXME: The RocksRepository implementation returns wrong relation size, if
-    /// you make a request with an old LSN. It seems to ignore the requested LSN
-    /// and always return result as of latest LSN. For such cases, the expected
-    /// results below match the current RocksRepository behavior, so that the test
-    /// passes, and the actually correct answers are in comments like
-    /// "// CORRECT: <correct answer>"
-    #[test]
-    fn test_relsize() -> Result<()> {
-        // get_timeline() with non-existent timeline id should fail
-        //repo.get_timeline("11223344556677881122334455667788");
-
-        // Create timeline to work on
-        let repo = get_test_repo("test_relsize")?;
-        let timelineid = ZTimelineId::from_str("11223344556677881122334455667788").unwrap();
-        let tline = repo.create_empty_timeline(timelineid)?;
-
-        tline.init_valid_lsn(Lsn(1));
-        tline.put_page_image(TEST_BUF(0), Lsn(2), TEST_IMG("foo blk 0 at 2"));
-        tline.put_page_image(TEST_BUF(0), Lsn(2), TEST_IMG("foo blk 0 at 2"));
-        tline.put_page_image(TEST_BUF(0), Lsn(3), TEST_IMG("foo blk 0 at 3"));
-        tline.put_page_image(TEST_BUF(1), Lsn(4), TEST_IMG("foo blk 1 at 4"));
-        tline.put_page_image(TEST_BUF(2), Lsn(5), TEST_IMG("foo blk 2 at 5"));
-
-        tline.advance_last_valid_lsn(Lsn(5));
-
-        // rocksdb implementation erroneosly returns 'true' here
-        assert_eq!(tline.get_relsize_exists(TESTREL_A, Lsn(1))?, true); // CORRECT: false
-                                                                        // likewise, it returns wrong size here
-        assert_eq!(tline.get_relsize(TESTREL_A, Lsn(1))?, 3); // CORRECT: 0 (or error?)
-
-        assert_eq!(tline.get_relsize_exists(TESTREL_A, Lsn(2))?, true);
-        assert_eq!(tline.get_relsize(TESTREL_A, Lsn(2))?, 3); // CORRECT: 1
-        assert_eq!(tline.get_relsize(TESTREL_A, Lsn(5))?, 3);
-
-        // Check page contents at each LSN
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(0), Lsn(2))?,
-            TEST_IMG("foo blk 0 at 2")
-        );
-
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(0), Lsn(3))?,
-            TEST_IMG("foo blk 0 at 3")
-        );
-
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(0), Lsn(4))?,
-            TEST_IMG("foo blk 0 at 3")
-        );
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(1), Lsn(4))?,
-            TEST_IMG("foo blk 1 at 4")
-        );
-
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(0), Lsn(5))?,
-            TEST_IMG("foo blk 0 at 3")
-        );
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(1), Lsn(5))?,
-            TEST_IMG("foo blk 1 at 4")
-        );
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(2), Lsn(5))?,
-            TEST_IMG("foo blk 2 at 5")
-        );
-
-        // Truncate last block
-        tline.put_truncation(TESTREL_A, Lsn(6), 2)?;
-        tline.advance_last_valid_lsn(Lsn(6));
-
-        // Check reported size and contents after truncation
-        assert_eq!(tline.get_relsize(TESTREL_A, Lsn(6))?, 2);
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(0), Lsn(6))?,
-            TEST_IMG("foo blk 0 at 3")
-        );
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(1), Lsn(6))?,
-            TEST_IMG("foo blk 1 at 4")
-        );
-
-        // should still see the truncated block with older LSN
-        assert_eq!(tline.get_relsize(TESTREL_A, Lsn(5))?, 2); // CORRECT: 3
-        assert_eq!(
-            tline.get_page_at_lsn(TEST_BUF(2), Lsn(5))?,
-            TEST_IMG("foo blk 2 at 5")
-        );
-
-        Ok(())
-    }
-
-    /// Test get_relsize() and truncation with a file larger than 1 GB, so that it's
-    /// split into multiple 1 GB segments in Postgres.
-    ///
-    /// This isn't very interesting with the RocksDb implementation, as we don't pay
-    /// any attention to Postgres segment boundaries there.
-    #[test]
-    fn test_large_rel() -> Result<()> {
-        let repo = get_test_repo("test_large_rel")?;
-        let timelineid = ZTimelineId::from_str("11223344556677881122334455667788").unwrap();
-        let tline = repo.create_empty_timeline(timelineid)?;
-
-        tline.init_valid_lsn(Lsn(1));
-
-        let mut lsn = 0;
-        for i in 0..pg_constants::RELSEG_SIZE + 1 {
-            let img = TEST_IMG(&format!("foo blk {} at {}", i, Lsn(lsn)));
-            lsn += 1;
-            tline.put_page_image(TEST_BUF(i as u32), Lsn(lsn), img);
-        }
-        tline.advance_last_valid_lsn(Lsn(lsn));
-
-        assert_eq!(
-            tline.get_relsize(TESTREL_A, Lsn(lsn))?,
-            pg_constants::RELSEG_SIZE + 1
-        );
-
-        // Truncate one block
-        lsn += 1;
-        tline.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE)?;
-        tline.advance_last_valid_lsn(Lsn(lsn));
-        assert_eq!(
-            tline.get_relsize(TESTREL_A, Lsn(lsn))?,
-            pg_constants::RELSEG_SIZE
-        );
-
-        // Truncate another block
-        lsn += 1;
-        tline.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE - 1)?;
-        tline.advance_last_valid_lsn(Lsn(lsn));
-        assert_eq!(
-            tline.get_relsize(TESTREL_A, Lsn(lsn))?,
-            pg_constants::RELSEG_SIZE - 1
-        );
-
-        Ok(())
-    }
-
-    // Mock WAL redo manager that doesn't do much
-    struct TestRedoManager {}
-
-    impl WalRedoManager for TestRedoManager {
-        fn request_redo(
-            &self,
-            tag: BufferTag,
-            lsn: Lsn,
-            base_img: Option<Bytes>,
-            records: Vec<WALRecord>,
-        ) -> Result<Bytes, WalRedoError> {
-            let s = format!(
-                "redo for rel {} blk {} to get to {}, with {} and {} records",
-                tag.rel,
-                tag.blknum,
-                lsn,
-                if base_img.is_some() {
-                    "base image"
-                } else {
-                    "no base image"
-                },
-                records.len()
-            );
-            println!("{}", s);
-            Ok(TEST_IMG(&s))
-        }
-    }
-}
--- a/pageserver/src/repository/rocksdb.rs
+++ b/pageserver/src/repository/rocksdb.rs
@@ -1,978 +0,0 @@
-//
-// A Repository holds all the different page versions and WAL records
-//
-// This implementation uses RocksDB to store WAL wal records and
-// full page images, keyed by the RelFileNode, blocknumber, and the
-// LSN.
-
-use crate::repository::{BufferTag, RelTag, Repository, Timeline, WALRecord};
-use crate::restore_local_repo::restore_timeline;
-use crate::waldecoder::{Oid, TransactionId};
-use crate::walredo::WalRedoManager;
-use crate::PageServerConf;
-use crate::ZTimelineId;
-// use crate::PageServerConf;
-// use crate::branches;
-use anyhow::{bail, Context, Result};
-use bytes::{Buf, BufMut, Bytes, BytesMut};
-use log::*;
-use postgres_ffi::nonrelfile_utils::transaction_id_get_status;
-use postgres_ffi::*;
-use std::cmp::min;
-use std::collections::HashMap;
-use std::convert::TryInto;
-use std::sync::atomic::AtomicU64;
-use std::sync::atomic::Ordering;
-use std::sync::{Arc, Mutex};
-use std::thread;
-use std::time::{Duration, Instant};
-use zenith_utils::lsn::{AtomicLsn, Lsn};
-use zenith_utils::seqwait::SeqWait;
-
-// Timeout when waiting or WAL receiver to catch up to an LSN given in a GetPage@LSN call.
-static TIMEOUT: Duration = Duration::from_secs(60);
-
-pub struct RocksRepository {
-    conf: &'static PageServerConf,
-    timelines: Mutex<HashMap<ZTimelineId, Arc<RocksTimeline>>>,
-
-    walredo_mgr: Arc<dyn WalRedoManager>,
-}
-
-pub struct RocksTimeline {
-    // RocksDB handle
-    db: rocksdb::DB,
-
-    // WAL redo manager
-    walredo_mgr: Arc<dyn WalRedoManager>,
-
-    // What page versions do we hold in the cache? If we get a request > last_valid_lsn,
-    // we need to wait until we receive all the WAL up to the request. The SeqWait
-    // provides functions for that. TODO: If we get a request for an old LSN, such that
-    // the versions have already been garbage collected away, we should throw an error,
-    // but we don't track that currently.
-    //
-    // last_record_lsn points to the end of last processed WAL record.
-    // It can lag behind last_valid_lsn, if the WAL receiver has received some WAL
-    // after the end of last record, but not the whole next record yet. In the
-    // page cache, we care about last_valid_lsn, but if the WAL receiver needs to
-    // restart the streaming, it needs to restart at the end of last record, so
-    // we track them separately. last_record_lsn should perhaps be in
-    // walreceiver.rs instead of here, but it seems convenient to keep all three
-    // values together.
-    //
-    last_valid_lsn: SeqWait<Lsn>,
-    last_record_lsn: AtomicLsn,
-
-    // Counters, for metrics collection.
-    pub num_entries: AtomicU64,
-    pub num_page_images: AtomicU64,
-    pub num_wal_records: AtomicU64,
-    pub num_getpage_requests: AtomicU64,
-}
-
-//
-// We store two kinds of entries in the repository:
-//
-// 1. Ready-made images of the block
-// 2. WAL records, to be applied on top of the "previous" entry
-//
-// Some WAL records will initialize the page from scratch. For such records,
-// the 'will_init' flag is set. They don't need the previous page image before
-// applying. The 'will_init' flag is set for records containing a full-page image,
-// and for records with the BKPBLOCK_WILL_INIT flag. These differ from PageImages
-// stored directly in the cache entry in that you still need to run the WAL redo
-// routine to generate the page image.
-//
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
-struct CacheKey {
-    pub tag: BufferTag,
-    pub lsn: Lsn,
-}
-
-impl CacheKey {
-    fn pack(&self, buf: &mut BytesMut) {
-        self.tag.pack(buf);
-        buf.put_u64(self.lsn.0);
-    }
-    fn unpack(buf: &mut Bytes) -> CacheKey {
-        CacheKey {
-            tag: BufferTag::unpack(buf),
-            lsn: Lsn::from(buf.get_u64()),
-        }
-    }
-
-    fn from_slice(slice: &[u8]) -> Self {
-        let mut buf = Bytes::copy_from_slice(slice);
-        Self::unpack(&mut buf)
-    }
-
-    fn to_bytes(&self) -> BytesMut {
-        let mut buf = BytesMut::new();
-        self.pack(&mut buf);
-        buf
-    }
-}
-
-enum CacheEntryContent {
-    PageImage(Bytes),
-    WALRecord(WALRecord),
-    Truncation,
-}
-
-// The serialized representation of a CacheEntryContent begins with
-// single byte that indicates what kind of entry it is. There is also
-// an UNUSED_VERSION_FLAG that is not represented in the CacheEntryContent
-// at all, you must peek into the first byte of the serialized representation
-// to read it.
-const CONTENT_PAGE_IMAGE: u8 = 1u8;
-const CONTENT_WAL_RECORD: u8 = 2u8;
-const CONTENT_TRUNCATION: u8 = 3u8;
-
-const CONTENT_KIND_MASK: u8 = 3u8; // bitmask that covers the above
-
-const UNUSED_VERSION_FLAG: u8 = 4u8;
-
-impl CacheEntryContent {
-    pub fn pack(&self, buf: &mut BytesMut) {
-        match self {
-            CacheEntryContent::PageImage(image) => {
-                buf.put_u8(CONTENT_PAGE_IMAGE);
-                buf.put_u16(image.len() as u16);
-                buf.put_slice(&image[..]);
-            }
-            CacheEntryContent::WALRecord(rec) => {
-                buf.put_u8(CONTENT_WAL_RECORD);
-                rec.pack(buf);
-            }
-            CacheEntryContent::Truncation => {
-                buf.put_u8(CONTENT_TRUNCATION);
-            }
-        }
-    }
-    pub fn unpack(buf: &mut Bytes) -> CacheEntryContent {
-        let kind = buf.get_u8() & CONTENT_KIND_MASK;
-
-        match kind {
-            CONTENT_PAGE_IMAGE => {
-                let len = buf.get_u16() as usize;
-                let mut dst = vec![0u8; len];
-                buf.copy_to_slice(&mut dst);
-                CacheEntryContent::PageImage(Bytes::from(dst))
-            }
-            CONTENT_WAL_RECORD => CacheEntryContent::WALRecord(WALRecord::unpack(buf)),
-            CONTENT_TRUNCATION => CacheEntryContent::Truncation,
-            _ => unreachable!(),
-        }
-    }
-
-    fn from_slice(slice: &[u8]) -> Self {
-        let mut buf = Bytes::copy_from_slice(slice);
-        Self::unpack(&mut buf)
-    }
-
-    fn to_bytes(&self) -> BytesMut {
-        let mut buf = BytesMut::new();
-        self.pack(&mut buf);
-        buf
-    }
-}
-
-impl RocksRepository {
-    pub fn new(
-        conf: &'static PageServerConf,
-        walredo_mgr: Arc<dyn WalRedoManager>,
-    ) -> RocksRepository {
-        RocksRepository {
-            conf: conf,
-            timelines: Mutex::new(HashMap::new()),
-            walredo_mgr,
-        }
-    }
-}
-
-// Get handle to a given timeline. It is assumed to already exist.
-impl Repository for RocksRepository {
-    fn get_timeline(&self, timelineid: ZTimelineId) -> Result<Arc<dyn Timeline>> {
-        let timelines = self.timelines.lock().unwrap();
-
-        match timelines.get(&timelineid) {
-            Some(timeline) => Ok(timeline.clone()),
-            None => bail!("timeline not found"),
-        }
-    }
-
-    fn get_or_restore_timeline(&self, timelineid: ZTimelineId) -> Result<Arc<dyn Timeline>> {
-        let mut timelines = self.timelines.lock().unwrap();
-
-        match timelines.get(&timelineid) {
-            Some(timeline) => Ok(timeline.clone()),
-            None => {
-                let timeline = RocksTimeline::new(self.conf, timelineid, self.walredo_mgr.clone());
-
-                restore_timeline(self.conf, &timeline, timelineid)?;
-
-                let timeline_rc = Arc::new(timeline);
-
-                timelines.insert(timelineid, timeline_rc.clone());
-
-                if self.conf.gc_horizon != 0 {
-                    let timeline_rc_copy = timeline_rc.clone();
-                    let conf = self.conf;
-                    let _gc_thread = thread::Builder::new()
-                        .name("Garbage collection thread".into())
-                        .spawn(move || {
-                            // FIXME
-                            timeline_rc_copy.do_gc(conf).expect("GC thread died");
-                        })
-                        .unwrap();
-                }
-                Ok(timeline_rc)
-            }
-        }
-    }
-
-    #[cfg(test)]
-    fn create_empty_timeline(&self, timelineid: ZTimelineId) -> Result<Arc<dyn Timeline>> {
-        let mut timelines = self.timelines.lock().unwrap();
-
-        let timeline = RocksTimeline::new(&self.conf, timelineid, self.walredo_mgr.clone());
-
-        let timeline_rc = Arc::new(timeline);
-        let r = timelines.insert(timelineid, timeline_rc.clone());
-        assert!(r.is_none());
-
-        // don't start the garbage collector for unit tests, either.
-
-        Ok(timeline_rc)
-    }
-}
-
-impl RocksTimeline {
-    fn open_rocksdb(conf: &PageServerConf, timelineid: ZTimelineId) -> rocksdb::DB {
-        let path = conf.timeline_path(timelineid);
-        let mut opts = rocksdb::Options::default();
-        opts.create_if_missing(true);
-        opts.set_use_fsync(true);
-        opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
-        opts.set_compaction_filter("ttl", move |_level: u32, _key: &[u8], val: &[u8]| {
-            if (val[0] & UNUSED_VERSION_FLAG) != 0 {
-                rocksdb::compaction_filter::Decision::Remove
-            } else {
-                rocksdb::compaction_filter::Decision::Keep
-            }
-        });
-        rocksdb::DB::open(&opts, &path).unwrap()
-    }
-
-    fn new(
-        conf: &'static PageServerConf,
-        timelineid: ZTimelineId,
-        walredo_mgr: Arc<dyn WalRedoManager>,
-    ) -> RocksTimeline {
-        RocksTimeline {
-            db: RocksTimeline::open_rocksdb(conf, timelineid),
-
-            walredo_mgr,
-
-            last_valid_lsn: SeqWait::new(Lsn(0)),
-            last_record_lsn: AtomicLsn::new(0),
-
-            num_entries: AtomicU64::new(0),
-            num_page_images: AtomicU64::new(0),
-            num_wal_records: AtomicU64::new(0),
-            num_getpage_requests: AtomicU64::new(0),
-        }
-    }
-}
-
-impl RocksTimeline {
-    ///
-    /// Collect all the WAL records that are needed to reconstruct a page
-    /// image for the given cache entry.
-    ///
-    /// Returns an old page image (if any), and a vector of WAL records to apply
-    /// over it.
-    ///
-    fn collect_records_for_apply(
-        &self,
-        tag: BufferTag,
-        lsn: Lsn,
-    ) -> (Option<Bytes>, Vec<WALRecord>) {
-        let key = CacheKey { tag, lsn };
-        let mut base_img: Option<Bytes> = None;
-        let mut records: Vec<WALRecord> = Vec::new();
-
-        let mut iter = self.db.raw_iterator();
-        iter.seek_for_prev(key.to_bytes());
-
-        // Scan backwards, collecting the WAL records, until we hit an
-        // old page image.
-        while iter.valid() {
-            let key = CacheKey::from_slice(iter.key().unwrap());
-            if key.tag != tag {
-                break;
-            }
-            let content = CacheEntryContent::from_slice(iter.value().unwrap());
-            if let CacheEntryContent::PageImage(img) = content {
-                // We have a base image. No need to dig deeper into the list of
-                // records
-                base_img = Some(img);
-                break;
-            } else if let CacheEntryContent::WALRecord(rec) = content {
-                records.push(rec.clone());
-                // If this WAL record initializes the page, no need to dig deeper.
-                if rec.will_init {
-                    break;
-                }
-            } else {
-                panic!("no base image and no WAL record on cache entry");
-            }
-            iter.prev();
-        }
-        records.reverse();
-        (base_img, records)
-    }
-
-    // Internal functions
-
-    //
-    // Internal function to get relation size at given LSN.
-    //
-    // The caller must ensure that WAL has been received up to 'lsn'.
-    //
-    fn relsize_get_nowait(&self, rel: RelTag, lsn: Lsn) -> Result<u32> {
-        assert!(lsn <= self.last_valid_lsn.load());
-
-        let mut key = CacheKey {
-            tag: BufferTag {
-                rel,
-                blknum: u32::MAX,
-            },
-            lsn,
-        };
-        let mut iter = self.db.raw_iterator();
-        loop {
-            iter.seek_for_prev(key.to_bytes());
-            if iter.valid() {
-                let thiskey = CacheKey::from_slice(iter.key().unwrap());
-                if thiskey.tag.rel == rel {
-                    let content = CacheEntryContent::from_slice(iter.value().unwrap());
-                    if let CacheEntryContent::Truncation = content {
-                        if thiskey.tag.blknum > 0 {
-                            key.tag.blknum = thiskey.tag.blknum - 1;
-                            continue;
-                        }
-                        break;
-                    }
-                    let relsize = thiskey.tag.blknum + 1;
-                    debug!("Size of relation {} at {} is {}", rel, lsn, relsize);
-                    return Ok(relsize);
-                }
-            }
-            break;
-        }
-        debug!("Size of relation {} at {} is zero", rel, lsn);
-        Ok(0)
-    }
-
-    fn do_gc(&self, conf: &'static PageServerConf) -> Result<Bytes> {
-        loop {
-            thread::sleep(conf.gc_period);
-            let last_lsn = self.get_last_valid_lsn();
-
-            // checked_sub() returns None on overflow.
-            if let Some(horizon) = last_lsn.checked_sub(conf.gc_horizon) {
-                let mut maxkey = CacheKey {
-                    tag: BufferTag {
-                        rel: RelTag {
-                            spcnode: u32::MAX,
-                            dbnode: u32::MAX,
-                            relnode: u32::MAX,
-                            forknum: u8::MAX,
-                        },
-                        blknum: u32::MAX,
-                    },
-                    lsn: Lsn::MAX,
-                };
-                let now = Instant::now();
-                let mut reconstructed = 0u64;
-                let mut truncated = 0u64;
-                let mut inspected = 0u64;
-                let mut deleted = 0u64;
-                loop {
-                    let mut iter = self.db.raw_iterator();
-                    iter.seek_for_prev(maxkey.to_bytes());
-                    if iter.valid() {
-                        let key = CacheKey::from_slice(iter.key().unwrap());
-                        let v = iter.value().unwrap();
-
-                        inspected += 1;
-
-                        // Construct boundaries for old records cleanup
-                        maxkey.tag = key.tag;
-                        let last_lsn = key.lsn;
-                        maxkey.lsn = min(horizon, last_lsn); // do not remove last version
-
-                        let mut minkey = maxkey.clone();
-                        minkey.lsn = Lsn(0); // first version
-
-                        // Special handling of delete of PREPARE WAL record
-                        if last_lsn < horizon
-                            && key.tag.rel.forknum == pg_constants::PG_TWOPHASE_FORKNUM
-                        {
-                            if (v[0] & UNUSED_VERSION_FLAG) == 0 {
-                                let mut v = v.to_owned();
-                                v[0] |= UNUSED_VERSION_FLAG;
-                                self.db.put(key.to_bytes(), &v[..])?;
-                                deleted += 1;
-                            }
-                            maxkey = minkey;
-                            continue;
-                        }
-                        // reconstruct most recent page version
-                        if (v[0] & CONTENT_KIND_MASK) == CONTENT_WAL_RECORD {
-                            // force reconstruction of most recent page version
-                            let (base_img, records) =
-                                self.collect_records_for_apply(key.tag, key.lsn);
-
-                            trace!(
-                                "Reconstruct most recent page {} blk {} at {} from {} records",
-                                key.tag.rel,
-                                key.tag.blknum,
-                                key.lsn,
-                                records.len()
-                            );
-
-                            let new_img = self
-                                .walredo_mgr
-                                .request_redo(key.tag, key.lsn, base_img, records)?;
-                            self.put_page_image(key.tag, key.lsn, new_img.clone());
-
-                            reconstructed += 1;
-                        }
-
-                        iter.seek_for_prev(maxkey.to_bytes());
-                        if iter.valid() {
-                            // do not remove last version
-                            if last_lsn > horizon {
-                                // locate most recent record before horizon
-                                let key = CacheKey::from_slice(iter.key().unwrap());
-                                if key.tag == maxkey.tag {
-                                    let v = iter.value().unwrap();
-                                    if (v[0] & CONTENT_KIND_MASK) == CONTENT_WAL_RECORD {
-                                        let (base_img, records) =
-                                            self.collect_records_for_apply(key.tag, key.lsn);
-                                        trace!("Reconstruct horizon page {} blk {} at {} from {} records",
-                                              key.tag.rel, key.tag.blknum, key.lsn, records.len());
-                                        let new_img = self
-                                            .walredo_mgr
-                                            .request_redo(key.tag, key.lsn, base_img, records)?;
-                                        self.put_page_image(key.tag, key.lsn, new_img.clone());
-
-                                        truncated += 1;
-                                    } else {
-                                        trace!(
-                                            "Keeping horizon page {} blk {} at {}",
-                                            key.tag.rel,
-                                            key.tag.blknum,
-                                            key.lsn
-                                        );
-                                    }
-                                }
-                            } else {
-                                trace!(
-                                    "Last page {} blk {} at {}, horizon {}",
-                                    key.tag.rel,
-                                    key.tag.blknum,
-                                    key.lsn,
-                                    horizon
-                                );
-                            }
-                            // remove records prior to horizon
-                            loop {
-                                iter.prev();
-                                if !iter.valid() {
-                                    break;
-                                }
-                                let key = CacheKey::from_slice(iter.key().unwrap());
-                                if key.tag != maxkey.tag {
-                                    break;
-                                }
-                                let v = iter.value().unwrap();
-                                if (v[0] & UNUSED_VERSION_FLAG) == 0 {
-                                    let mut v = v.to_owned();
-                                    v[0] |= UNUSED_VERSION_FLAG;
-                                    self.db.put(key.to_bytes(), &v[..])?;
-                                    deleted += 1;
-                                    trace!(
-                                        "deleted: {} blk {} at {}",
-                                        key.tag.rel,
-                                        key.tag.blknum,
-                                        key.lsn
-                                    );
-                                } else {
-                                    break;
-                                }
-                            }
-                        }
-                        maxkey = minkey;
-                    } else {
-                        break;
-                    }
-                }
-                info!("Garbage collection completed in {:?}:\n{} version chains inspected, {} pages reconstructed, {} version histories truncated, {} versions deleted",
-					  now.elapsed(), inspected, reconstructed, truncated, deleted);
-            }
-        }
-    }
-
-    //
-    // Wait until WAL has been received up to the given LSN.
-    //
-    fn wait_lsn(&self, mut lsn: Lsn) -> Result<Lsn> {
-        // When invalid LSN is requested, it means "don't wait, return latest version of the page"
-        // This is necessary for bootstrap.
-        if lsn == Lsn(0) {
-            let last_valid_lsn = self.last_valid_lsn.load();
-            trace!(
-                "walreceiver doesn't work yet last_valid_lsn {}, requested {}",
-                last_valid_lsn,
-                lsn
-            );
-            lsn = last_valid_lsn;
-        }
-        //trace!("Start waiting for LSN {}, valid LSN is {}", lsn,  self.last_valid_lsn.load());
-        self.last_valid_lsn
-            .wait_for_timeout(lsn, TIMEOUT)
-            .with_context(|| {
-                format!(
-                    "Timed out while waiting for WAL record at LSN {} to arrive",
-                    lsn
-                )
-            })?;
-        //trace!("Stop waiting for LSN {}, valid LSN is {}", lsn,  self.last_valid_lsn.load());
-
-        Ok(lsn)
-    }
-}
-
-impl Timeline for RocksTimeline {
-    // Public GET interface functions
-
-    ///
-    /// GetPage@LSN
-    ///
-    /// Returns an 8k page image
-    ///
-    fn get_page_at_lsn(&self, tag: BufferTag, req_lsn: Lsn) -> Result<Bytes> {
-        self.num_getpage_requests.fetch_add(1, Ordering::Relaxed);
-
-        let lsn = self.wait_lsn(req_lsn)?;
-
-        // Look up cache entry. If it's a page image, return that. If it's a WAL record,
-        // ask the WAL redo service to reconstruct the page image from the WAL records.
-        let key = CacheKey { tag, lsn };
-
-        let mut iter = self.db.raw_iterator();
-        iter.seek_for_prev(key.to_bytes());
-
-        if iter.valid() {
-            let key = CacheKey::from_slice(iter.key().unwrap());
-            if key.tag == tag {
-                let content = CacheEntryContent::from_slice(iter.value().unwrap());
-                let page_img: Bytes;
-                if let CacheEntryContent::PageImage(img) = content {
-                    page_img = img;
-                } else if let CacheEntryContent::WALRecord(_rec) = content {
-                    // Request the WAL redo manager to apply the WAL records for us.
-                    let (base_img, records) = self.collect_records_for_apply(tag, lsn);
-                    page_img = self.walredo_mgr.request_redo(tag, lsn, base_img, records)?;
-
-                    self.put_page_image(tag, lsn, page_img.clone());
-                } else {
-                    // No base image, and no WAL record. Huh?
-                    bail!("no page image or WAL record for requested page");
-                }
-                // FIXME: assumes little-endian. Only used for the debugging log though
-                let page_lsn_hi =
-                    u32::from_le_bytes(page_img.get(0..4).unwrap().try_into().unwrap());
-                let page_lsn_lo =
-                    u32::from_le_bytes(page_img.get(4..8).unwrap().try_into().unwrap());
-                debug!(
-                    "Returning page with LSN {:X}/{:X} for {} blk {}",
-                    page_lsn_hi, page_lsn_lo, tag.rel, tag.blknum
-                );
-                return Ok(page_img);
-            }
-        }
-        static ZERO_PAGE: [u8; 8192] = [0u8; 8192];
-        debug!(
-            "Page {} blk {} at {}({}) not found",
-            tag.rel, tag.blknum, req_lsn, lsn
-        );
-        Ok(Bytes::from_static(&ZERO_PAGE))
-        /* return Err("could not find page image")?; */
-    }
-
-    ///
-    /// Get size of relation at given LSN.
-    ///
-    fn get_relsize(&self, rel: RelTag, lsn: Lsn) -> Result<u32> {
-        let lsn = self.wait_lsn(lsn)?;
-        self.relsize_get_nowait(rel, lsn)
-    }
-
-    /// Get vector of prepared twophase transactions
-    fn get_twophase(&self, lsn: Lsn) -> Result<Vec<TransactionId>> {
-        let key = CacheKey {
-            // minimal key
-            tag: BufferTag {
-                rel: RelTag {
-                    forknum: pg_constants::PG_TWOPHASE_FORKNUM,
-                    spcnode: 0,
-                    dbnode: 0,
-                    relnode: 0,
-                },
-                blknum: 0,
-            },
-            lsn: Lsn(0),
-        };
-        let mut gxacts = Vec::new();
-
-        let mut iter = self.db.raw_iterator();
-        iter.seek(key.to_bytes());
-        while iter.valid() {
-            let key = CacheKey::from_slice(iter.key().unwrap());
-            if key.tag.rel.forknum != pg_constants::PG_TWOPHASE_FORKNUM {
-                break; // we are done with this fork
-            }
-            if key.lsn <= lsn {
-                let xid = key.tag.blknum;
-                let tag = BufferTag {
-                    rel: RelTag {
-                        forknum: pg_constants::PG_XACT_FORKNUM,
-                        spcnode: 0,
-                        dbnode: 0,
-                        relnode: 0,
-                    },
-                    blknum: xid / pg_constants::CLOG_XACTS_PER_PAGE,
-                };
-                let clog_page = self.get_page_at_lsn(tag, lsn)?;
-                let status = transaction_id_get_status(xid, &clog_page[..]);
-                if status == pg_constants::TRANSACTION_STATUS_IN_PROGRESS {
-                    gxacts.push(xid);
-                }
-            }
-            iter.next();
-        }
-        return Ok(gxacts);
-    }
-
-    /// Get databases. This function is used to local pg_filenode.map files
-    fn get_databases(&self, lsn: Lsn) -> Result<Vec<RelTag>> {
-        let key = CacheKey {
-            // minimal key
-            tag: BufferTag {
-                rel: RelTag {
-                    forknum: pg_constants::PG_FILENODEMAP_FORKNUM,
-                    spcnode: 0,
-                    dbnode: 0,
-                    relnode: 0,
-                },
-                blknum: 0,
-            },
-            lsn: Lsn(0),
-        };
-        let mut dbs = Vec::new();
-
-        let mut iter = self.db.raw_iterator();
-        iter.seek(key.to_bytes());
-        let mut prev_tag = key.tag.rel;
-        while iter.valid() {
-            let key = CacheKey::from_slice(iter.key().unwrap());
-            if key.tag.rel.forknum != pg_constants::PG_FILENODEMAP_FORKNUM {
-                break; // we are done with this fork
-            }
-            if key.tag.rel != prev_tag && key.lsn <= lsn {
-                prev_tag = key.tag.rel;
-                dbs.push(prev_tag); // collect unique tags
-            }
-            iter.next();
-        }
-        return Ok(dbs);
-    }
-
-    /// Get range [begin,end) of stored blocks. Used mostly for SMGR pseudorelations
-    /// but can be also applied to normal relations.
-    fn get_range(&self, rel: RelTag, lsn: Lsn) -> Result<(u32, u32)> {
-        let _lsn = self.wait_lsn(lsn)?;
-        let mut key = CacheKey {
-            // minimal key to start with
-            tag: BufferTag { rel, blknum: 0 },
-            lsn: Lsn(0),
-        };
-        let mut iter = self.db.raw_iterator();
-        iter.seek(key.to_bytes()); // locate first entry
-        if iter.valid() {
-            let thiskey = CacheKey::from_slice(iter.key().unwrap());
-            let tag = thiskey.tag;
-            if tag.rel == rel {
-                // still trversing this relation
-                let first_blknum = tag.blknum;
-                key.tag.blknum = u32::MAX; // maximal key
-                let mut iter = self.db.raw_iterator();
-                iter.seek_for_prev(key.to_bytes()); // localte last entry
-                if iter.valid() {
-                    let thiskey = CacheKey::from_slice(iter.key().unwrap());
-                    let last_blknum = thiskey.tag.blknum;
-                    return Ok((first_blknum, last_blknum + 1)); // upper boundary is exclusive
-                }
-            }
-        }
-        Ok((0, 0)) // empty range
-    }
-
-    ///
-    /// Does relation exist at given LSN?
-    ///
-    /// FIXME: this actually returns true, if the relation exists at *any* LSN
-    fn get_relsize_exists(&self, rel: RelTag, req_lsn: Lsn) -> Result<bool> {
-        let lsn = self.wait_lsn(req_lsn)?;
-
-        let key = CacheKey {
-            tag: BufferTag {
-                rel,
-                blknum: u32::MAX,
-            },
-            lsn,
-        };
-        let mut iter = self.db.raw_iterator();
-        iter.seek_for_prev(key.to_bytes());
-        if iter.valid() {
-            let key = CacheKey::from_slice(iter.key().unwrap());
-            if key.tag.rel == rel {
-                debug!("Relation {} exists at {}", rel, lsn);
-                return Ok(true);
-            }
-        }
-        debug!("Relation {} doesn't exist at {}", rel, lsn);
-        Ok(false)
-    }
-
-    // Other public functions, for updating the repository.
-    // These are used by the WAL receiver and WAL redo.
-
-    ///
-    /// Adds a WAL record to the repository
-    ///
-    fn put_wal_record(&self, tag: BufferTag, rec: WALRecord) {
-        let lsn = rec.lsn;
-        let key = CacheKey { tag, lsn };
-
-        let content = CacheEntryContent::WALRecord(rec);
-
-        let _res = self.db.put(key.to_bytes(), content.to_bytes());
-        trace!(
-            "put_wal_record rel {} blk {} at {}",
-            tag.rel,
-            tag.blknum,
-            lsn
-        );
-
-        self.num_entries.fetch_add(1, Ordering::Relaxed);
-        self.num_wal_records.fetch_add(1, Ordering::Relaxed);
-    }
-
-    ///
-    /// Adds a relation-wide WAL record (like truncate) to the repository,
-    /// associating it with all pages started with specified block number
-    ///
-    fn put_truncation(&self, rel: RelTag, lsn: Lsn, nblocks: u32) -> Result<()> {
-        // What was the size of the relation before this record?
-        let last_lsn = self.last_valid_lsn.load();
-        let old_rel_size = self.relsize_get_nowait(rel, last_lsn)?;
-
-        let content = CacheEntryContent::Truncation;
-        // set new relation size
-        trace!("Truncate relation {} to {} blocks at {}", rel, nblocks, lsn);
-
-        for blknum in nblocks..old_rel_size {
-            let key = CacheKey {
-                tag: BufferTag { rel, blknum },
-                lsn,
-            };
-            trace!("put_wal_record lsn: {}", key.lsn);
-            let _res = self.db.put(key.to_bytes(), content.to_bytes());
-        }
-        let n = (old_rel_size - nblocks) as u64;
-        self.num_entries.fetch_add(n, Ordering::Relaxed);
-        self.num_wal_records.fetch_add(n, Ordering::Relaxed);
-        Ok(())
-    }
-
-    ///
-    /// Get page image at particular LSN
-    ///
-    fn get_page_image(&self, tag: BufferTag, lsn: Lsn) -> Result<Option<Bytes>> {
-        let key = CacheKey { tag, lsn };
-        if let Some(bytes) = self.db.get(key.to_bytes())? {
-            let content = CacheEntryContent::from_slice(&bytes);
-            if let CacheEntryContent::PageImage(img) = content {
-                return Ok(Some(img));
-            }
-        }
-        return Ok(None);
-    }
-
-    ///
-    /// Memorize a full image of a page version
-    ///
-    fn put_page_image(&self, tag: BufferTag, lsn: Lsn, img: Bytes) {
-        let img_len = img.len();
-        let key = CacheKey { tag, lsn };
-        let content = CacheEntryContent::PageImage(img);
-
-        let mut val_buf = content.to_bytes();
-
-        // Zero size of page image indicates that page can be removed
-        if img_len == 0 {
-            if (val_buf[0] & UNUSED_VERSION_FLAG) != 0 {
-                // records already marked for deletion
-                return;
-            } else {
-                // delete truncated multixact page
-                val_buf[0] |= UNUSED_VERSION_FLAG;
-            }
-        }
-
-        trace!("put_wal_record lsn: {}", key.lsn);
-        let _res = self.db.put(key.to_bytes(), content.to_bytes());
-
-        trace!(
-            "put_page_image rel {} blk {} at {}",
-            tag.rel,
-            tag.blknum,
-            lsn
-        );
-        self.num_page_images.fetch_add(1, Ordering::Relaxed);
-    }
-
-    fn put_create_database(
-        &self,
-        lsn: Lsn,
-        db_id: Oid,
-        tablespace_id: Oid,
-        src_db_id: Oid,
-        src_tablespace_id: Oid,
-    ) -> Result<()> {
-        let mut n = 0;
-        for forknum in &[
-            pg_constants::MAIN_FORKNUM,
-            pg_constants::FSM_FORKNUM,
-            pg_constants::VISIBILITYMAP_FORKNUM,
-            pg_constants::INIT_FORKNUM,
-            pg_constants::PG_FILENODEMAP_FORKNUM,
-        ] {
-            let key = CacheKey {
-                tag: BufferTag {
-                    rel: RelTag {
-                        spcnode: src_tablespace_id,
-                        dbnode: src_db_id,
-                        relnode: 0,
-                        forknum: *forknum,
-                    },
-                    blknum: 0,
-                },
-                lsn: Lsn(0),
-            };
-            let mut iter = self.db.raw_iterator();
-            iter.seek(key.to_bytes());
-            while iter.valid() {
-                let mut key = CacheKey::from_slice(iter.key().unwrap());
-                if key.tag.rel.spcnode != src_tablespace_id || key.tag.rel.dbnode != src_db_id {
-                    break;
-                }
-                key.tag.rel.spcnode = tablespace_id;
-                key.tag.rel.dbnode = db_id;
-                key.lsn = lsn;
-
-                let v = iter.value().unwrap();
-                self.db.put(key.to_bytes(), v)?;
-                n += 1;
-                iter.next();
-            }
-        }
-        info!(
-            "Create database {}/{}, copy {} entries",
-            tablespace_id, db_id, n
-        );
-        Ok(())
-    }
-
-    /// Remember that WAL has been received and added to the timeline up to the given LSN
-    fn advance_last_valid_lsn(&self, lsn: Lsn) {
-        let lsn = Lsn((lsn.0 + 7) & !7); // align position on 8 bytes
-        let old = self.last_valid_lsn.advance(lsn);
-
-        // Can't move backwards.
-        if lsn < old {
-            warn!(
-                "attempted to move last valid LSN backwards (was {}, new {})",
-                old, lsn
-            );
-        }
-    }
-
-    ///
-    /// Remember the (end of) last valid WAL record remembered for the timeline.
-    ///
-    /// NOTE: this updates last_valid_lsn as well.
-    ///
-    fn advance_last_record_lsn(&self, lsn: Lsn) {
-        let lsn = Lsn((lsn.0 + 7) & !7); // align position on 8 bytes
-                                         // Can't move backwards.
-        let old = self.last_record_lsn.fetch_max(lsn);
-        assert!(old <= lsn);
-
-        // Also advance last_valid_lsn
-        let old = self.last_valid_lsn.advance(lsn);
-        // Can't move backwards.
-        if lsn < old {
-            warn!(
-                "attempted to move last record LSN backwards (was {}, new {})",
-                old, lsn
-            );
-        }
-    }
-
-    fn get_last_record_lsn(&self) -> Lsn {
-        self.last_record_lsn.load()
-    }
-
-    fn init_valid_lsn(&self, lsn: Lsn) {
-        let old = self.last_valid_lsn.advance(lsn);
-        assert!(old == Lsn(0));
-        let old = self.last_record_lsn.fetch_max(lsn);
-        assert!(old == Lsn(0));
-    }
-
-    fn get_last_valid_lsn(&self) -> Lsn {
-        self.last_valid_lsn.load()
-    }
-
-    //
-    // Get statistics to be displayed in the user interface.
-    //
-    // FIXME
-    /*
-    fn get_stats(&self) -> TimelineStats {
-        TimelineStats {
-            num_entries: self.num_entries.load(Ordering::Relaxed),
-            num_page_images: self.num_page_images.load(Ordering::Relaxed),
-            num_wal_records: self.num_wal_records.load(Ordering::Relaxed),
-            num_getpage_requests: self.num_getpage_requests.load(Ordering::Relaxed),
-        }
-    }
-    */
-}
--- a/pageserver/src/restore_local_repo.rs
+++ b/pageserver/src/restore_local_repo.rs
@@ -1,483 +0,0 @@
-//
-// Restore chunks from local Zenith repository
-//
-// This runs once at Page Server startup. It loads all the "snapshots" and all
-// WAL from all timelines from the local zenith repository into the in-memory page
-// cache.
-//
-// This also initializes the "last valid LSN" in the page cache to the last LSN
-// seen in the WAL, so that when the WAL receiver is started, it starts
-// streaming from that LSN.
-//
-
-use log::*;
-use std::cmp::max;
-use std::fs;
-use std::fs::File;
-use std::io::Read;
-use std::io::Seek;
-use std::io::SeekFrom;
-use std::path::{Path, PathBuf};
-
-use anyhow::Result;
-use bytes::Bytes;
-
-use crate::repository::{BufferTag, RelTag, Timeline};
-use crate::waldecoder::{decode_wal_record, Oid, WalStreamDecoder};
-use crate::PageServerConf;
-use crate::ZTimelineId;
-use postgres_ffi::relfile_utils::*;
-use postgres_ffi::xlog_utils::*;
-use postgres_ffi::*;
-use zenith_utils::lsn::Lsn;
-
-///
-/// Load all WAL and all relation data pages from local disk into the repository.
-///
-pub fn restore_timeline(
-    conf: &PageServerConf,
-    timeline: &dyn Timeline,
-    timelineid: ZTimelineId,
-) -> Result<()> {
-    let timelinepath = PathBuf::from("timelines").join(timelineid.to_string());
-
-    if !timelinepath.exists() {
-        anyhow::bail!("timeline {} does not exist in the page server's repository");
-    }
-
-    // Scan .zenith/timelines/<timeline>/snapshots
-    let snapshotspath = PathBuf::from("timelines")
-        .join(timelineid.to_string())
-        .join("snapshots");
-
-    let mut last_snapshot_lsn: Lsn = Lsn(0);
-
-    for direntry in fs::read_dir(&snapshotspath).unwrap() {
-        let direntry = direntry?;
-        let filename = direntry.file_name();
-        let lsn = Lsn::from_filename(&filename)?;
-        last_snapshot_lsn = max(lsn, last_snapshot_lsn);
-
-        // FIXME: pass filename as Path instead of str?
-        let filename_str = filename.into_string().unwrap();
-        restore_snapshot(conf, timeline, timelineid, &filename_str)?;
-        info!("restored snapshot at {:?}", filename_str);
-    }
-
-    if last_snapshot_lsn == Lsn(0) {
-        error!(
-            "could not find valid snapshot in {}",
-            snapshotspath.display()
-        );
-        // TODO return error?
-    }
-    timeline.init_valid_lsn(last_snapshot_lsn);
-
-    restore_wal(timeline, timelineid, last_snapshot_lsn)?;
-
-    Ok(())
-}
-
-///
-/// Find latest snapshot in a timeline's 'snapshots' directory
-///
-pub fn find_latest_snapshot(_conf: &PageServerConf, timeline: ZTimelineId) -> Result<Lsn> {
-    let snapshotspath = format!("timelines/{}/snapshots", timeline);
-
-    let mut last_snapshot_lsn = Lsn(0);
-    for direntry in fs::read_dir(&snapshotspath).unwrap() {
-        let filename = direntry.unwrap().file_name();
-
-        if let Ok(lsn) = Lsn::from_filename(&filename) {
-            last_snapshot_lsn = max(lsn, last_snapshot_lsn);
-        } else {
-            error!("unrecognized file in snapshots directory: {:?}", filename);
-        }
-    }
-
-    if last_snapshot_lsn == Lsn(0) {
-        error!("could not find valid snapshot in {}", &snapshotspath);
-        // TODO return error?
-    }
-    Ok(last_snapshot_lsn)
-}
-
-fn restore_snapshot(
-    conf: &PageServerConf,
-    timeline: &dyn Timeline,
-    timelineid: ZTimelineId,
-    snapshot: &str,
-) -> Result<()> {
-    let snapshotpath = PathBuf::from("timelines")
-        .join(timelineid.to_string())
-        .join("snapshots")
-        .join(snapshot);
-
-    // Scan 'global'
-    for direntry in fs::read_dir(snapshotpath.join("global"))? {
-        let direntry = direntry?;
-        match direntry.file_name().to_str() {
-            None => continue,
-
-            // These special files appear in the snapshot, but are not needed by the page server
-            Some("pg_control") => restore_nonrel_file(
-                conf,
-                timeline,
-                timelineid,
-                "0",
-                0,
-                0,
-                pg_constants::PG_CONTROLFILE_FORKNUM,
-                0,
-                &direntry.path(),
-            )?,
-            Some("pg_filenode.map") => restore_nonrel_file(
-                conf,
-                timeline,
-                timelineid,
-                snapshot,
-                pg_constants::GLOBALTABLESPACE_OID,
-                0,
-                pg_constants::PG_FILENODEMAP_FORKNUM,
-                0,
-                &direntry.path(),
-            )?,
-
-            // Load any relation files into the page server
-            _ => restore_relfile(
-                timeline,
-                snapshot,
-                pg_constants::GLOBALTABLESPACE_OID,
-                0,
-                &direntry.path(),
-            )?,
-        }
-    }
-
-    // Scan 'base'. It contains database dirs, the database OID is the filename.
-    // E.g. 'base/12345', where 12345 is the database OID.
-    for direntry in fs::read_dir(snapshotpath.join("base"))? {
-        let direntry = direntry?;
-
-        let dboid = direntry.file_name().to_str().unwrap().parse::<u32>()?;
-
-        for direntry in fs::read_dir(direntry.path())? {
-            let direntry = direntry?;
-            match direntry.file_name().to_str() {
-                None => continue,
-
-                // These special files appear in the snapshot, but are not needed by the page server
-                Some("PG_VERSION") => continue,
-                Some("pg_filenode.map") => restore_nonrel_file(
-                    conf,
-                    timeline,
-                    timelineid,
-                    snapshot,
-                    pg_constants::DEFAULTTABLESPACE_OID,
-                    dboid,
-                    pg_constants::PG_FILENODEMAP_FORKNUM,
-                    0,
-                    &direntry.path(),
-                )?,
-
-                // Load any relation files into the page server
-                _ => restore_relfile(
-                    timeline,
-                    snapshot,
-                    pg_constants::DEFAULTTABLESPACE_OID,
-                    dboid,
-                    &direntry.path(),
-                )?,
-            }
-        }
-    }
-    for entry in fs::read_dir(snapshotpath.join("pg_xact"))? {
-        let entry = entry?;
-        restore_slru_file(
-            conf,
-            timeline,
-            timelineid,
-            snapshot,
-            pg_constants::PG_XACT_FORKNUM,
-            &entry.path(),
-        )?;
-    }
-    for entry in fs::read_dir(snapshotpath.join("pg_multixact").join("members"))? {
-        let entry = entry?;
-        restore_slru_file(
-            conf,
-            timeline,
-            timelineid,
-            snapshot,
-            pg_constants::PG_MXACT_MEMBERS_FORKNUM,
-            &entry.path(),
-        )?;
-    }
-    for entry in fs::read_dir(snapshotpath.join("pg_multixact").join("offsets"))? {
-        let entry = entry?;
-        restore_slru_file(
-            conf,
-            timeline,
-            timelineid,
-            snapshot,
-            pg_constants::PG_MXACT_OFFSETS_FORKNUM,
-            &entry.path(),
-        )?;
-    }
-    for entry in fs::read_dir(snapshotpath.join("pg_twophase"))? {
-        let entry = entry?;
-        let xid = u32::from_str_radix(&entry.path().to_str().unwrap(), 16)?;
-        restore_nonrel_file(
-            conf,
-            timeline,
-            timelineid,
-            snapshot,
-            0,
-            0,
-            pg_constants::PG_TWOPHASE_FORKNUM,
-            xid,
-            &entry.path(),
-        )?;
-    }
-    // TODO: Scan pg_tblspc
-
-    Ok(())
-}
-
-fn restore_relfile(
-    timeline: &dyn Timeline,
-    snapshot: &str,
-    spcoid: Oid,
-    dboid: Oid,
-    path: &Path,
-) -> Result<()> {
-    let lsn = Lsn::from_hex(snapshot)?;
-
-    // Does it look like a relation file?
-
-    let p = parse_relfilename(path.file_name().unwrap().to_str().unwrap());
-    if let Err(e) = p {
-        warn!("unrecognized file in snapshot: {:?} ({})", path, e);
-        return Err(e.into());
-    }
-    let (relnode, forknum, segno) = p.unwrap();
-
-    let mut file = File::open(path)?;
-    let mut buf: [u8; 8192] = [0u8; 8192];
-
-    let mut blknum: u32 = segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
-    loop {
-        let r = file.read_exact(&mut buf);
-        match r {
-            Ok(_) => {
-                let tag = BufferTag {
-                    rel: RelTag {
-                        spcnode: spcoid,
-                        dbnode: dboid,
-                        relnode,
-                        forknum,
-                    },
-                    blknum,
-                };
-                timeline.put_page_image(tag, lsn, Bytes::copy_from_slice(&buf));
-                /*
-                if oldest_lsn == 0 || p.lsn < oldest_lsn {
-                    oldest_lsn = p.lsn;
-                }
-                 */
-            }
-
-            // TODO: UnexpectedEof is expected
-            Err(e) => match e.kind() {
-                std::io::ErrorKind::UnexpectedEof => {
-                    // reached EOF. That's expected.
-                    // FIXME: maybe check that we read the full length of the file?
-                    break;
-                }
-                _ => {
-                    error!("error reading file: {:?} ({})", path, e);
-                    break;
-                }
-            },
-        };
-        blknum += 1;
-    }
-
-    Ok(())
-}
-
-fn restore_nonrel_file(
-    _conf: &PageServerConf,
-    timeline: &dyn Timeline,
-    _timelineid: ZTimelineId,
-    snapshot: &str,
-    spcoid: Oid,
-    dboid: Oid,
-    forknum: u8,
-    blknum: u32,
-    path: &Path,
-) -> Result<()> {
-    let lsn = Lsn::from_hex(snapshot)?;
-
-    // Does it look like a relation file?
-
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    let tag = BufferTag {
-        rel: RelTag {
-            spcnode: spcoid,
-            dbnode: dboid,
-            relnode: 0,
-            forknum,
-        },
-        blknum,
-    };
-    timeline.put_page_image(tag, lsn, Bytes::copy_from_slice(&buffer[..]));
-    Ok(())
-}
-
-fn restore_slru_file(
-    _conf: &PageServerConf,
-    timeline: &dyn Timeline,
-    _timelineid: ZTimelineId,
-    snapshot: &str,
-    forknum: u8,
-    path: &Path,
-) -> Result<()> {
-    let lsn = Lsn::from_hex(snapshot)?;
-
-    // Does it look like a relation file?
-
-    let mut file = File::open(path)?;
-    let mut buf: [u8; 8192] = [0u8; 8192];
-    let segno = u32::from_str_radix(path.file_name().unwrap().to_str().unwrap(), 16)?;
-
-    let mut blknum: u32 = segno * pg_constants::SLRU_PAGES_PER_SEGMENT;
-    loop {
-        let r = file.read_exact(&mut buf);
-        match r {
-            Ok(_) => {
-                let tag = BufferTag {
-                    rel: RelTag {
-                        spcnode: 0,
-                        dbnode: 0,
-                        relnode: 0,
-                        forknum,
-                    },
-                    blknum,
-                };
-                timeline.put_page_image(tag, lsn, Bytes::copy_from_slice(&buf));
-                /*
-                if oldest_lsn == 0 || p.lsn < oldest_lsn {
-                    oldest_lsn = p.lsn;
-                }
-                 */
-            }
-
-            // TODO: UnexpectedEof is expected
-            Err(e) => match e.kind() {
-                std::io::ErrorKind::UnexpectedEof => {
-                    // reached EOF. That's expected.
-                    // FIXME: maybe check that we read the full length of the file?
-                    break;
-                }
-                _ => {
-                    error!("error reading file: {:?} ({})", path, e);
-                    break;
-                }
-            },
-        };
-        blknum += 1;
-    }
-
-    Ok(())
-}
-
-// Scan WAL on a timeline, starting from given LSN, and load all the records
-// into the page cache.
-fn restore_wal(timeline: &dyn Timeline, timelineid: ZTimelineId, startpoint: Lsn) -> Result<()> {
-    let walpath = format!("timelines/{}/wal", timelineid);
-
-    let mut waldecoder = WalStreamDecoder::new(startpoint);
-
-    let mut segno = startpoint.segment_number(pg_constants::WAL_SEGMENT_SIZE);
-    let mut offset = startpoint.segment_offset(pg_constants::WAL_SEGMENT_SIZE);
-    let mut last_lsn = Lsn(0);
-
-    let mut checkpoint = CheckPoint::new(startpoint.0, 1);
-    let checkpoint_tag = BufferTag::fork(pg_constants::PG_CHECKPOINT_FORKNUM);
-    let pg_control_tag = BufferTag::fork(pg_constants::PG_CONTROLFILE_FORKNUM);
-    if let Some(pg_control_bytes) = timeline.get_page_image(pg_control_tag, Lsn(0))? {
-        let pg_control = decode_pg_control(pg_control_bytes)?;
-        checkpoint = pg_control.checkPointCopy.clone();
-    } else {
-        error!("No control file is found in reposistory");
-    }
-
-    loop {
-        // FIXME: assume postgresql tli 1 for now
-        let filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE);
-        let mut path = walpath.clone() + "/" + &filename;
-
-        // It could be as .partial
-        if !PathBuf::from(&path).exists() {
-            path += ".partial";
-        }
-
-        // Slurp the WAL file
-        let open_result = File::open(&path);
-        if let Err(e) = &open_result {
-            if e.kind() == std::io::ErrorKind::NotFound {
-                break;
-            }
-        }
-        let mut file = open_result?;
-
-        if offset > 0 {
-            file.seek(SeekFrom::Start(offset as u64))?;
-        }
-
-        let mut buf = Vec::new();
-        let nread = file.read_to_end(&mut buf)?;
-        if nread != pg_constants::WAL_SEGMENT_SIZE - offset as usize {
-            // Maybe allow this for .partial files?
-            error!("read only {} bytes from WAL file", nread);
-        }
-        waldecoder.feed_bytes(&buf);
-
-        let mut nrecords = 0;
-        loop {
-            let rec = waldecoder.poll_decode();
-            if rec.is_err() {
-                // Assume that an error means we've reached the end of
-                // a partial WAL record. So that's ok.
-                trace!("WAL decoder error {:?}", rec);
-                waldecoder.set_position(Lsn((segno + 1) * pg_constants::WAL_SEGMENT_SIZE as u64));
-                break;
-            }
-            if let Some((lsn, recdata)) = rec.unwrap() {
-                let decoded = decode_wal_record(&mut checkpoint, recdata.clone());
-                timeline.save_decoded_record(decoded, recdata, lsn)?;
-                last_lsn = lsn;
-            } else {
-                break;
-            }
-            nrecords += 1;
-        }
-
-        info!(
-            "restored {} records from WAL file {} at {}",
-            nrecords, filename, last_lsn
-        );
-
-        segno += 1;
-        offset = 0;
-    }
-    info!("reached end of WAL at {}", last_lsn);
-    let checkpoint_bytes = encode_checkpoint(checkpoint);
-    timeline.put_page_image(checkpoint_tag, Lsn(0), checkpoint_bytes);
-    Ok(())
-}
--- a/pageserver/src/restore_s3.rs
+++ b/pageserver/src/restore_s3.rs
@@ -22,9 +22,7 @@ use tokio::runtime;

 use futures::future;

-use crate::{page_cache, PageServerConf};
-use postgres_ffi::pg_constants;
-use postgres_ffi::relfile_utils::*;
+use crate::{controlfile, page_cache, pg_constants, PageServerConf};

 struct Storage {
    region: Region,
@@ -40,9 +38,12 @@ pub fn restore_main(conf: &PageServerConf) {
        let result = restore_chunk(conf).await;

        match result {
-            Ok(_) => {}
+            Ok(_) => {
+                return;
+            }
            Err(err) => {
                error!("S3 error: {}", err);
+                return;
            }
        }
    });
@@ -59,8 +60,8 @@ pub fn restore_main(conf: &PageServerConf) {
 async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
    let backend = Storage {
        region: Region::Custom {
-            region: env::var("S3_REGION").unwrap(),
-            endpoint: env::var("S3_ENDPOINT").unwrap(),
+            region: env::var("S3_REGION").unwrap().into(),
+            endpoint: env::var("S3_ENDPOINT").unwrap().into(),
        },
        credentials: Credentials::new(
            Some(&env::var("S3_ACCESSKEY").unwrap()),
@@ -83,8 +84,24 @@ async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
        .list("relationdata/".to_string(), Some("".to_string()))
        .await?;

-    // TODO: get that from backup
-    let sys_id: u64 = 42;
+    //Before uploading other files, slurp pg_control to set systemid
+
+    let control_results: Vec<s3::serde_types::ListBucketResult> = bucket
+        .list(
+            "relationdata/global/pg_control".to_string(),
+            Some("".to_string()),
+        )
+        .await?;
+    let object = &(&control_results[0]).contents[0];
+    let (data, _) = bucket.get_object(&object.key).await.unwrap();
+    let bytes = BytesMut::from(data.as_slice()).freeze();
+    let c = controlfile::decode_pg_control(bytes);
+
+    let pcache = page_cache::get_pagecache(conf.clone(), c.system_identifier);
+    pcache.set_controldata(c.clone());
+    trace!("uploaded controlfile {:?}", pcache.get_controldata());
+
+    let sys_id: u64 = c.system_identifier;
    let mut oldest_lsn = 0;
    let mut slurp_futures: Vec<_> = Vec::new();

@@ -118,35 +135,114 @@ async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
        panic!("no base backup found");
    }

-    let pcache = page_cache::get_pagecache(conf, sys_id);
+    //Now add nonrelation files
+    let nonrelresults: Vec<s3::serde_types::ListBucketResult> = bucket
+        .list("nonreldata/".to_string(), Some("".to_string()))
+        .await?;
+    for result in nonrelresults {
+        for object in result.contents {
+            // Download needed non relation files, slurping them into memory
+
+            let key = object.key;
+            let relpath = key.strip_prefix("nonreldata/").unwrap();
+            trace!("list nonrelfiles {}", relpath);
+
+            let parsed = parse_nonrel_file_path(&relpath);
+
+            match parsed {
+                Ok(p) => {
+                    let b = bucket.clone();
+                    let f = slurp_base_file(conf, sys_id, b, key.to_string(), p);
+
+                    slurp_futures.push(f);
+                }
+                Err(e) => {
+                    warn!("unrecognized file: {} ({})", relpath, e);
+                }
+            };
+        }
+    }
+
    pcache.init_valid_lsn(oldest_lsn);

    info!("{} files to restore...", slurp_futures.len());

    future::join_all(slurp_futures).await;
-    info!("restored!");
+    info!(
+        "restored! {:?} to {:?}",
+        pcache.first_valid_lsn, pcache.last_valid_lsn
+    );

    Ok(())
 }

+#[derive(Debug)]
+struct FilePathError {
+    msg: String,
+}
+
+impl FilePathError {
+    fn new(msg: &str) -> FilePathError {
+        FilePathError {
+            msg: msg.to_string(),
+        }
+    }
+}
+
+impl From<core::num::ParseIntError> for FilePathError {
+    fn from(e: core::num::ParseIntError) -> Self {
+        return FilePathError {
+            msg: format!("invalid filename: {}", e),
+        };
+    }
+}
+
+impl fmt::Display for FilePathError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "invalid filename")
+    }
+}
+
+fn forkname_to_forknum(forkname: Option<&str>) -> Result<u32, FilePathError> {
+    match forkname {
+        // "main" is not in filenames, it's implicit if the fork name is not present
+        None => Ok(0),
+        Some("fsm") => Ok(1),
+        Some("vm") => Ok(2),
+        Some("init") => Ok(3),
+        Some(_) => Err(FilePathError::new("invalid forkname")),
+    }
+}
+
 #[derive(Debug)]
 struct ParsedBaseImageFileName {
    pub spcnode: u32,
    pub dbnode: u32,
    pub relnode: u32,
-    pub forknum: u8,
+    pub forknum: u32,
    pub segno: u32,

    pub lsn: u64,
 }

+fn parse_lsn_from_filename(fname: &str) -> Result<u64, FilePathError> {
+    let (_, lsn_str) = fname.split_at(fname.len() - 16);
+
+    let (lsnhi, lsnlo) = lsn_str.split_at(8);
+    let lsn_hi = u64::from_str_radix(lsnhi, 16)?;
+    let lsn_lo = u64::from_str_radix(lsnlo, 16)?;
+    let lsn = lsn_hi << 32 | lsn_lo;
+
+    return Ok(lsn);
+}
+
 // formats:
 // <oid>
 // <oid>_<fork name>
 // <oid>.<segment number>
 // <oid>_<fork name>.<segment number>

-fn parse_filename(fname: &str) -> Result<(u32, u8, u32, u64), FilePathError> {
+fn parse_filename(fname: &str) -> Result<(u32, u32, u32, u64), FilePathError> {
    let re = Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?_(?P<lsnhi>[[:xdigit:]]{8})(?P<lsnlo>[[:xdigit:]]{8})$").unwrap();

    let caps = re
@@ -154,23 +250,68 @@ fn parse_filename(fname: &str) -> Result<(u32, u8, u32, u64), FilePathError> {
        .ok_or_else(|| FilePathError::new("invalid relation data file name"))?;

    let relnode_str = caps.name("relnode").unwrap().as_str();
-    let relnode: u32 = relnode_str.parse()?;
+    let relnode = u32::from_str_radix(relnode_str, 10)?;

-    let forkname = caps.name("forkname").map(|f| f.as_str());
+    let forkname_match = caps.name("forkname");
+    let forkname = if forkname_match.is_none() {
+        None
+    } else {
+        Some(forkname_match.unwrap().as_str())
+    };
    let forknum = forkname_to_forknum(forkname)?;

    let segno_match = caps.name("segno");
    let segno = if segno_match.is_none() {
        0
    } else {
-        segno_match.unwrap().as_str().parse::<u32>()?
+        u32::from_str_radix(segno_match.unwrap().as_str(), 10)?
    };

-    let lsn_hi: u64 = caps.name("lsnhi").unwrap().as_str().parse()?;
-    let lsn_lo: u64 = caps.name("lsnlo").unwrap().as_str().parse()?;
+    let lsn_hi = u64::from_str_radix(caps.name("lsnhi").unwrap().as_str(), 16)?;
+    let lsn_lo = u64::from_str_radix(caps.name("lsnlo").unwrap().as_str(), 16)?;
    let lsn = lsn_hi << 32 | lsn_lo;

-    Ok((relnode, forknum, segno, lsn))
+    return Ok((relnode, forknum, segno, lsn));
+}
+
+fn parse_nonrel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathError> {
+    //TODO parse segno from xact filenames too
+    if let Some(fname) = path.strip_prefix("pg_xact/") {
+        let lsn = parse_lsn_from_filename(fname.clone())?;
+
+        return Ok(ParsedBaseImageFileName {
+            spcnode: 0,
+            dbnode: 0,
+            relnode: 0,
+            forknum: pg_constants::PG_XACT_FORKNUM,
+            segno: 0,
+            lsn,
+        });
+    } else if let Some(fname) = path.strip_prefix("pg_multixact/offsets") {
+        let lsn = parse_lsn_from_filename(fname.clone())?;
+
+        return Ok(ParsedBaseImageFileName {
+            spcnode: 0,
+            dbnode: 0,
+            relnode: 0,
+            forknum: pg_constants::PG_MXACT_OFFSETS_FORKNUM,
+            segno: 0,
+            lsn,
+        });
+    } else if let Some(fname) = path.strip_prefix("pg_multixact/members") {
+        let lsn = parse_lsn_from_filename(fname.clone())?;
+
+        return Ok(ParsedBaseImageFileName {
+            spcnode: 0,
+            dbnode: 0,
+            relnode: 0,
+            forknum: pg_constants::PG_MXACT_MEMBERS_FORKNUM,
+            segno: 0,
+            lsn,
+        });
+    } else {
+        return Err(FilePathError::new("invalid non relation data file name"));
+    }
 }

 fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathError> {
@@ -192,22 +333,48 @@ fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathEr
     * <oid>.<segment number>
     */
    if let Some(fname) = path.strip_prefix("global/") {
+        if fname.contains("pg_control") {
+            let lsn = parse_lsn_from_filename(fname.clone())?;
+
+            return Ok(ParsedBaseImageFileName {
+                spcnode: pg_constants::GLOBALTABLESPACE_OID,
+                dbnode: 0,
+                relnode: 0,
+                forknum: pg_constants::PG_CONTROLFILE_FORKNUM,
+                segno: 0,
+                lsn,
+            });
+        }
+
+        if fname.contains("pg_filenode") {
+            let lsn = parse_lsn_from_filename(fname.clone())?;
+
+            return Ok(ParsedBaseImageFileName {
+                spcnode: pg_constants::GLOBALTABLESPACE_OID,
+                dbnode: 0,
+                relnode: 0,
+                forknum: pg_constants::PG_FILENODEMAP_FORKNUM,
+                segno: 0,
+                lsn,
+            });
+        }
+
        let (relnode, forknum, segno, lsn) = parse_filename(fname)?;

-        Ok(ParsedBaseImageFileName {
+        return Ok(ParsedBaseImageFileName {
            spcnode: pg_constants::GLOBALTABLESPACE_OID,
            dbnode: 0,
            relnode,
            forknum,
            segno,
            lsn,
-        })
+        });
    } else if let Some(dbpath) = path.strip_prefix("base/") {
        let mut s = dbpath.split("/");
        let dbnode_str = s
            .next()
            .ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
-        let dbnode: u32 = dbnode_str.parse()?;
+        let dbnode = u32::from_str_radix(dbnode_str, 10)?;
        let fname = s
            .next()
            .ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
@@ -215,21 +382,34 @@ fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathEr
            return Err(FilePathError::new("invalid relation data file name"));
        };

+        if fname.contains("pg_filenode") {
+            let lsn = parse_lsn_from_filename(fname.clone())?;
+
+            return Ok(ParsedBaseImageFileName {
+                spcnode: pg_constants::DEFAULTTABLESPACE_OID,
+                dbnode,
+                relnode: 0,
+                forknum: pg_constants::PG_FILENODEMAP_FORKNUM,
+                segno: 0,
+                lsn,
+            });
+        }
+
        let (relnode, forknum, segno, lsn) = parse_filename(fname)?;

-        Ok(ParsedBaseImageFileName {
+        return Ok(ParsedBaseImageFileName {
            spcnode: pg_constants::DEFAULTTABLESPACE_OID,
            dbnode,
            relnode,
            forknum,
            segno,
            lsn,
-        })
+        });
    } else if let Some(_) = path.strip_prefix("pg_tblspc/") {
        // TODO
-        Err(FilePathError::new("tablespaces not supported"))
+        return Err(FilePathError::new("tablespaces not supported"));
    } else {
-        Err(FilePathError::new("invalid relation data file name"))
+        return Err(FilePathError::new("invalid relation data file name"));
    }
 }

@@ -252,23 +432,55 @@ async fn slurp_base_file(

    let mut bytes = BytesMut::from(data.as_slice()).freeze();

-    let mut blknum: u32 = parsed.segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
+    let pcache = page_cache::get_pagecache(conf.clone(), sys_id);

-    let pcache = page_cache::get_pagecache(conf, sys_id);
-
-    while bytes.remaining() >= 8192 {
-        let tag = page_cache::BufferTag {
-            rel: page_cache::RelTag {
+    // pg_filenode.map has non-standard size - 512 bytes
+    if parsed.forknum == pg_constants::PG_FILENODEMAP_FORKNUM {
+        let b = bytes.clone();
+        controlfile::decode_filemapping(b);
+        while bytes.remaining() >= 512 {
+            let tag = page_cache::BufferTag {
                spcnode: parsed.spcnode,
                dbnode: parsed.dbnode,
                relnode: parsed.relnode,
-                forknum: parsed.forknum,
-            },
-            blknum,
+                forknum: parsed.forknum as u8,
+                blknum: 0,
+            };
+
+            pcache.put_page_image(tag, parsed.lsn, bytes.copy_to_bytes(512));
+        }
+
+        let tag = page_cache::RelTag {
+            spcnode: parsed.spcnode,
+            dbnode: parsed.dbnode,
+            relnode: parsed.relnode,
+            forknum: parsed.forknum as u8,
        };

-        pcache.put_page_image(tag, parsed.lsn, bytes.copy_to_bytes(8192));
+        pcache.relsize_inc(&tag, Some(0));
+    } else {
+        // FIXME: use constants (BLCKSZ)
+        let mut blknum: u32 = parsed.segno * (1024 * 1024 * 1024 / 8192);
+        let reltag = page_cache::RelTag {
+            spcnode: parsed.spcnode,
+            dbnode: parsed.dbnode,
+            relnode: parsed.relnode,
+            forknum: parsed.forknum as u8,
+        };

-        blknum += 1;
+        while bytes.remaining() >= 8192 {
+            let tag = page_cache::BufferTag {
+                spcnode: parsed.spcnode,
+                dbnode: parsed.dbnode,
+                relnode: parsed.relnode,
+                forknum: parsed.forknum as u8,
+                blknum: blknum,
+            };
+
+            pcache.put_page_image(tag, parsed.lsn, bytes.copy_to_bytes(8192));
+            pcache.relsize_inc(&reltag, Some(blknum));
+
+            blknum += 1;
+        }
    }
 }
--- a/pageserver/src/tui.rs
+++ b/pageserver/src/tui.rs
@@ -14,6 +14,7 @@ use tui::text::{Span, Spans, Text};
 use tui::widgets::{Block, BorderType, Borders, Paragraph, Widget};
 use tui::Terminal;

+use slog;
 use slog::Drain;

 lazy_static! {
@@ -31,7 +32,7 @@ pub fn init_logging() -> slog_scope::GlobalLoggerGuard {
            {
                return true;
            }
-            false
+            return false;
        })
        .fuse();

@@ -41,7 +42,7 @@ pub fn init_logging() -> slog_scope::GlobalLoggerGuard {
        {
            return true;
        }
-        false
+        return false;
    })
    .fuse();

@@ -52,7 +53,7 @@ pub fn init_logging() -> slog_scope::GlobalLoggerGuard {
            {
                return true;
            }
-            false
+            return false;
        })
        .fuse();

@@ -65,7 +66,7 @@ pub fn init_logging() -> slog_scope::GlobalLoggerGuard {
        {
            return true;
        }
-        false
+        return false;
    })
    .fuse();

@@ -84,14 +85,14 @@ pub fn init_logging() -> slog_scope::GlobalLoggerGuard {
            return true;
        }

-        false
+        return false;
    })
    .fuse();
    let logger = slog::Logger::root(drain, slog::o!());
-    slog_scope::set_global_logger(logger)
+    return slog_scope::set_global_logger(logger);
 }

-pub fn ui_main() -> Result<(), Box<dyn Error>> {
+pub fn ui_main<'b>() -> Result<(), Box<dyn Error>> {
    // Terminal initialization
    let stdout = io::stdout().into_raw_mode()?;
    let stdout = MouseTerminal::from(stdout);
@@ -171,11 +172,6 @@ pub fn ui_main() -> Result<(), Box<dyn Error>> {
        })?;

        // If ther user presses 'q', quit.
-
-        // silence clippy's suggestion to rewrite this as an if-statement. Match
-        // makes more sense as soon as we get another command than 'q'.
-        #[allow(clippy::single_match)]
-        #[allow(clippy::collapsible_match)]
        if let Event::Input(key) = events.next()? {
            match key {
                Key::Char('q') => {
@@ -192,7 +188,6 @@ pub fn ui_main() -> Result<(), Box<dyn Error>> {
    Ok(())
 }

-#[allow(dead_code)]
 struct LogWidget<'a> {
    logger: &'a TuiLogger,
    title: &'a str,
@@ -234,7 +229,7 @@ impl<'a> Widget for LogWidget<'a> {
 // Render a widget to show some metrics
 struct MetricsWidget {}

-fn _get_metric_u64(title: &str, value: u64) -> Spans {
+fn get_metric_u64<'a>(title: &'a str, value: u64) -> Spans<'a> {
    Spans::from(vec![
        Span::styled(format!("{:<20}", title), Style::default()),
        Span::raw(": "),
@@ -245,9 +240,7 @@ fn _get_metric_u64(title: &str, value: u64) -> Spans {
    ])
 }

-// This is not used since LSNs were removed from page cache stats.
-// Maybe it will be used in the future?
-fn _get_metric_str<'a>(title: &str, value: &'a str) -> Spans<'a> {
+fn get_metric_str<'a>(title: &'a str, value: &'a str) -> Spans<'a> {
    Spans::from(vec![
        Span::styled(format!("{:<20}", title), Style::default()),
        Span::raw(": "),
@@ -255,6 +248,13 @@ fn _get_metric_str<'a>(title: &str, value: &'a str) -> Spans<'a> {
    ])
 }

+// FIXME: We really should define a datatype for LSNs, with Display trait and
+// helper functions. There's one in tokio-postgres, but I don't think we want
+// to rely on that.
+fn format_lsn(lsn: u64) -> String {
+    return format!("{:X}/{:X}", lsn >> 32, lsn & 0xffff_ffff);
+}
+
 impl tui::widgets::Widget for MetricsWidget {
    fn render(self, area: Rect, buf: &mut Buffer) {
        let block = Block::default()
@@ -265,24 +265,17 @@ impl tui::widgets::Widget for MetricsWidget {

        block.render(area, buf);

-        #[allow(unused_mut)]
        let mut lines: Vec<Spans> = Vec::new();

-        // FIXME
-        //let page_cache_stats = crate::page_cache::get_stats();
-
-        // This is not used since LSNs were removed from page cache stats.
-        // Maybe it will be used in the future?
-        /*
+        let page_cache_stats = crate::page_cache::get_stats();
        let lsnrange = format!(
            "{} - {}",
-            page_cache_stats.first_valid_lsn, page_cache_stats.last_valid_lsn
+            format_lsn(page_cache_stats.first_valid_lsn),
+            format_lsn(page_cache_stats.last_valid_lsn)
        );
-        let last_valid_recordlsn_str = page_cache_stats.last_record_lsn.to_string();
+        let last_valid_recordlsn_str = format_lsn(page_cache_stats.last_record_lsn);
        lines.push(get_metric_str("Valid LSN range", &lsnrange));
        lines.push(get_metric_str("Last record LSN", &last_valid_recordlsn_str));
-        */
-        /*
        lines.push(get_metric_u64(
            "# of cache entries",
            page_cache_stats.num_entries,
@@ -299,7 +292,7 @@ impl tui::widgets::Widget for MetricsWidget {
            "# of GetPage@LSN calls",
            page_cache_stats.num_getpage_requests,
        ));
-        */
+
        let text = Text::from(lines);

        Paragraph::new(text).render(inner_area, buf);
--- a/pageserver/src/tui_event.rs
+++ b/pageserver/src/tui_event.rs
@@ -10,6 +10,7 @@ use std::time::Duration;
 use termion::event::Key;
 use termion::input::TermRead;

+#[allow(dead_code)]
 pub enum Event<I> {
    Input(I),
    Tick,
@@ -76,8 +77,8 @@ impl Events {
        };
        Events {
            rx,
-            input_handle,
            ignore_exit_key,
+            input_handle,
            tick_handle,
        }
    }
--- a/pageserver/src/tui_logger.rs
+++ b/pageserver/src/tui_logger.rs
@@ -10,6 +10,7 @@
 //
 use chrono::offset::Local;
 use chrono::DateTime;
+use slog;
 use slog::{Drain, Level, OwnedKVList, Record};
 use slog_async::AsyncRecord;
 use std::collections::VecDeque;
@@ -51,7 +52,7 @@ impl Drain for TuiLogger {
            events.pop_back();
        }

-        Ok(())
+        return Ok(());
    }
 }

@@ -80,7 +81,7 @@ impl<'b> TuiLoggerWidget<'b> {
            style_trace: None,
            style_info: None,
            show_module: true,
-            logger,
+            logger: logger,
        }
    }
 }
@@ -167,7 +168,7 @@ impl<'b> Widget for TuiLoggerWidget<'b> {
                        Level::Debug => (self.style_debug, "DEBUG", true),
                        Level::Trace => (self.style_trace, "TRACE", true),
                    };
-                    line.push(Span::styled(txt, lvl_style.unwrap_or_default()));
+                    line.push(Span::styled(txt, lvl_style.unwrap_or(Style::default())));

                    if self.show_module {
                        line.push(Span::raw(" "));
--- a/pageserver/src/waldecoder.rs
+++ b/pageserver/src/waldecoder.rs
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -1,211 +1,168 @@
-//!
-//! WAL receiver
-//!
-//! The WAL receiver connects to the WAL safekeeper service, and streams WAL.
-//! For each WAL record, it decodes the record to figure out which data blocks
-//! the record affects, and adds the records to the page cache.
-//!
+//
+// WAL receiver
+//
+// The WAL receiver connects to the WAL safekeeper service, and streams WAL.
+// For each WAL record, it decodes the record to figure out which data blocks
+// the record affects, and adds the records to the page cache.
+//
+use log::*;
+
+use tokio::runtime;
+use tokio::time::{sleep, Duration};
+use tokio_stream::StreamExt;

 use crate::page_cache;
-use crate::repository::*;
-use crate::waldecoder::*;
+use crate::page_cache::BufferTag;
+use crate::waldecoder::WalStreamDecoder;
 use crate::PageServerConf;
-use crate::ZTimelineId;
-use anyhow::Error;
-use lazy_static::lazy_static;
-use log::*;
-use postgres::fallible_iterator::FallibleIterator;
-use postgres::replication::ReplicationIter;
-use postgres::{Client, NoTls, SimpleQueryMessage, SimpleQueryRow};
-use postgres_ffi::xlog_utils::*;
-use postgres_ffi::*;
+
 use postgres_protocol::message::backend::ReplicationMessage;
-use postgres_types::PgLsn;
-use std::collections::HashMap;
-use std::fs;
-use std::fs::{File, OpenOptions};
-use std::io::{Seek, SeekFrom, Write};
-use std::path::PathBuf;
-use std::str::FromStr;
-use std::sync::Mutex;
-use std::thread;
-use std::thread::sleep;
-use std::time::{Duration, SystemTime};
-use zenith_utils::lsn::Lsn;
-
-//
-// We keep one WAL Receiver active per timeline.
-//
-struct WalReceiverEntry {
-    wal_producer_connstr: String,
-}
-
-lazy_static! {
-    static ref WAL_RECEIVERS: Mutex<HashMap<ZTimelineId, WalReceiverEntry>> =
-        Mutex::new(HashMap::new());
-}
-
-// Launch a new WAL receiver, or tell one that's running about change in connection string
-pub fn launch_wal_receiver(
-    conf: &'static PageServerConf,
-    timelineid: ZTimelineId,
-    wal_producer_connstr: &str,
-) {
-    let mut receivers = WAL_RECEIVERS.lock().unwrap();
-
-    match receivers.get_mut(&timelineid) {
-        Some(receiver) => {
-            receiver.wal_producer_connstr = wal_producer_connstr.into();
-        }
-        None => {
-            let receiver = WalReceiverEntry {
-                wal_producer_connstr: wal_producer_connstr.into(),
-            };
-            receivers.insert(timelineid, receiver);
-
-            // Also launch a new thread to handle this connection
-            let _walreceiver_thread = thread::Builder::new()
-                .name("WAL receiver thread".into())
-                .spawn(move || {
-                    thread_main(conf, timelineid);
-                })
-                .unwrap();
-        }
-    };
-}
-
-// Look up current WAL producer connection string in the hash table
-fn get_wal_producer_connstr(timelineid: ZTimelineId) -> String {
-    let receivers = WAL_RECEIVERS.lock().unwrap();
-
-    receivers
-        .get(&timelineid)
-        .unwrap()
-        .wal_producer_connstr
-        .clone()
-}
+use tokio_postgres::{connect_replication, Error, NoTls, ReplicationMode};

 //
 // This is the entry point for the WAL receiver thread.
 //
-fn thread_main(conf: &'static PageServerConf, timelineid: ZTimelineId) {
-    info!(
-        "WAL receiver thread started for timeline : '{}'",
-        timelineid
-    );
+pub fn thread_main(conf: PageServerConf, wal_producer_connstr: &String) {
+    info!("WAL receiver thread started: '{}'", wal_producer_connstr);

-    //
-    // Make a connection to the WAL safekeeper, or directly to the primary PostgreSQL server,
-    // and start streaming WAL from it. If the connection is lost, keep retrying.
-    //
-    loop {
-        // Look up the current WAL producer address
-        let wal_producer_connstr = get_wal_producer_connstr(timelineid);
+    let runtime = runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .unwrap();

-        let res = walreceiver_main(conf, timelineid, &wal_producer_connstr);
+    runtime.block_on(async {
+        loop {
+            let _res = walreceiver_main(conf.clone(), wal_producer_connstr).await;

-        if let Err(e) = res {
+            // TODO: print/log the error
            info!(
-                "WAL streaming connection failed ({}), retrying in 1 second",
-                e
+                "WAL streaming connection failed, retrying in 1 second...: {:?}",
+                _res
            );
-            sleep(Duration::from_secs(1));
+            sleep(Duration::from_secs(1)).await;
        }
-    }
+    });
 }

-fn walreceiver_main(
-    _conf: &PageServerConf,
-    timelineid: ZTimelineId,
-    wal_producer_connstr: &str,
+async fn walreceiver_main(
+    conf: PageServerConf,
+    wal_producer_connstr: &String,
 ) -> Result<(), Error> {
    // Connect to the database in replication mode.
-    info!("connecting to {:?}", wal_producer_connstr);
-    let connect_cfg = format!("{} replication=true", wal_producer_connstr);
+    debug!("connecting to {}...", wal_producer_connstr);
+    let (mut rclient, connection) = connect_replication(
+        wal_producer_connstr.as_str(),
+        NoTls,
+        ReplicationMode::Physical,
+    )
+    .await?;
+    debug!("connected!");

-    let mut rclient = Client::connect(&connect_cfg, NoTls)?;
-    info!("connected!");
+    // The connection object performs the actual communication with the database,
+    // so spawn it off to run on its own.
+    tokio::spawn(async move {
+        if let Err(e) = connection.await {
+            error!("connection error: {}", e);
+        }
+    });

-    let identify = identify_system(&mut rclient)?;
-    info!("{:?}", identify);
-    let end_of_wal = Lsn::from(u64::from(identify.xlogpos));
+    let identify_system = rclient.identify_system().await?;
+    let end_of_wal = u64::from(identify_system.xlogpos());
    let mut caught_up = false;

-    let repository = page_cache::get_repository();
-    let timeline = repository.get_timeline(timelineid).unwrap();
+    let sysid: u64 = identify_system.systemid().parse().unwrap();
+    let pcache = page_cache::get_pagecache(conf, sysid);

    //
    // Start streaming the WAL, from where we left off previously.
    //
-    // If we had previously received WAL up to some point in the middle of a WAL record, we
-    // better start from the end of last full WAL record, not in the middle of one. Hence,
-    // use 'last_record_lsn' rather than 'last_valid_lsn' here.
-    let last_rec_lsn = timeline.get_last_record_lsn();
-    let mut startpoint = last_rec_lsn;
-
-    if startpoint == Lsn(0) {
-        error!("No previous WAL position");
-    }
-
-    startpoint = Lsn::max(
-        startpoint,
-        Lsn(end_of_wal.0 & !(pg_constants::WAL_SEGMENT_SIZE as u64 - 1)),
-    );
-
-    // There might be some padding after the last full record, skip it.
-    //
-    // FIXME: It probably would be better to always start streaming from the beginning
-    // of the page, or the segment, so that we could check the page/segment headers
-    // too. Just for the sake of paranoia.
-    startpoint += startpoint.calc_padding(8u32);
-
-    debug!(
-        "last_record_lsn {} starting replication from {} for timeline {}, server is at {}...",
-        last_rec_lsn, startpoint, timelineid, end_of_wal
-    );
-
-    let query = format!("START_REPLICATION PHYSICAL {}", startpoint);
-
-    let copy_stream = rclient.copy_both_simple(&query)?;
-    let mut physical_stream = ReplicationIter::new(copy_stream);
-
-    let mut waldecoder = WalStreamDecoder::new(startpoint);
-
-    let mut checkpoint = CheckPoint::new(startpoint.0, identify.timeline);
-    let checkpoint_tag = BufferTag::fork(pg_constants::PG_CHECKPOINT_FORKNUM);
-    if let Some(checkpoint_bytes) = timeline.get_page_image(checkpoint_tag, Lsn(0))? {
-        checkpoint = decode_checkpoint(checkpoint_bytes)?;
-        trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
+    let mut startpoint = pcache.get_last_valid_lsn();
+    if startpoint == 0 {
+        // If we start here with identify_system.xlogpos() we will have race condition with
+        // postgres start: insert into postgres may request page that was modified with lsn
+        // smaller than identify_system.xlogpos().
+        //
+        // Current procedure for starting postgres will anyway be changed to something
+        // different like having 'initdb' method on a pageserver (or importing some shared
+        // empty database snapshot), so for now I just put start of first segment which
+        // seems to be a valid record.
+        pcache.init_valid_lsn(0x_1_000_000_u64);
+        startpoint = u64::from(0x_1_000_000_u64);
    } else {
-        error!("No checkpoint record was found in reposistory");
+        // There might be some padding after the last full record, skip it.
+        //
+        // FIXME: It probably would be better to always start streaming from the beginning
+        // of the page, or the segment, so that we could check the page/segment headers
+        // too. Just for the sake of paranoia.
+        if startpoint % 8 != 0 {
+            startpoint += 8 - (startpoint % 8);
+        }
    }
-    while let Some(replication_message) = physical_stream.next()? {
-        match replication_message {
+    debug!(
+        "starting replication from {:X}/{:X}, server is at {:X}/{:X}...",
+        (startpoint >> 32),
+        (startpoint & 0xffffffff),
+        (end_of_wal >> 32),
+        (end_of_wal & 0xffffffff)
+    );
+    let startpoint = tokio_postgres::types::Lsn::from(startpoint);
+    let mut physical_stream = rclient
+        .start_physical_replication(None, startpoint, None)
+        .await?;
+    let mut waldecoder = WalStreamDecoder::new(u64::from(startpoint));
+
+    while let Some(replication_message) = physical_stream.next().await {
+        match replication_message? {
            ReplicationMessage::XLogData(xlog_data) => {
                // Pass the WAL data to the decoder, and see if we can decode
                // more records as a result.
                let data = xlog_data.data();
-                let startlsn = Lsn::from(xlog_data.wal_start());
+                let startlsn = xlog_data.wal_start();
                let endlsn = startlsn + data.len() as u64;

-                write_wal_file(startlsn, timelineid, pg_constants::WAL_SEGMENT_SIZE, data)?;
-
-                trace!("received XLogData between {} and {}", startlsn, endlsn);
+                trace!(
+                    "received XLogData between {:X}/{:X} and {:X}/{:X}",
+                    (startlsn >> 32),
+                    (startlsn & 0xffffffff),
+                    (endlsn >> 32),
+                    (endlsn & 0xffffffff)
+                );

                waldecoder.feed_bytes(data);

-                while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
-                    let old_checkpoint_bytes = encode_checkpoint(checkpoint);
-                    let decoded = decode_wal_record(&mut checkpoint, recdata.clone());
-                    timeline.save_decoded_record(decoded, recdata, lsn)?;
+                loop {
+                    if let Some((lsn, recdata)) = waldecoder.poll_decode() {
+                        let decoded =
+                            crate::waldecoder::decode_wal_record(startlsn, recdata.clone());

-                    let new_checkpoint_bytes = encode_checkpoint(checkpoint);
-                    if new_checkpoint_bytes != old_checkpoint_bytes {
-                        timeline.put_page_image(checkpoint_tag, Lsn(0), new_checkpoint_bytes);
+                        // Put the WAL record to the page cache. We make a separate copy of
+                        // it for every block it modifies. (The actual WAL record is kept in
+                        // a Bytes, which uses a reference counter for the underlying buffer,
+                        // so having multiple copies of it doesn't cost that much)
+                        for blk in decoded.blocks.iter() {
+                            let tag = BufferTag {
+                                spcnode: blk.rnode_spcnode,
+                                dbnode: blk.rnode_dbnode,
+                                relnode: blk.rnode_relnode,
+                                forknum: blk.forknum as u8,
+                                blknum: blk.blkno,
+                            };
+
+                            let rec = page_cache::WALRecord {
+                                lsn: lsn,
+                                will_init: blk.will_init || blk.apply_image,
+                                rec: recdata.clone(),
+                            };
+
+                            pcache.put_wal_record(tag, rec);
+                        }
+
+                        // Now that this record has been handled, let the page cache know that
+                        // it is up-to-date to this LSN
+                        pcache.advance_last_valid_lsn(lsn);
+                    } else {
+                        break;
                    }
-                    // Now that this record has been handled, let the page cache know that
-                    // it is up-to-date to this LSN
-                    timeline.advance_last_record_lsn(lsn);
                }

                // Update the last_valid LSN value in the page cache one more time. We updated
@@ -214,180 +171,24 @@ fn walreceiver_main(
                // better reflect that, because GetPage@LSN requests might also point in the
                // middle of a record, if the request LSN was taken from the server's current
                // flush ptr.
-                timeline.advance_last_valid_lsn(endlsn);
+                pcache.advance_last_valid_lsn(endlsn);

                if !caught_up && endlsn >= end_of_wal {
-                    info!("caught up at LSN {}", endlsn);
+                    info!(
+                        "caught up at LSN {:X}/{:X}",
+                        (endlsn >> 32),
+                        (endlsn & 0xffffffff)
+                    );
                    caught_up = true;
                }
            }

-            ReplicationMessage::PrimaryKeepAlive(keepalive) => {
-                let wal_end = keepalive.wal_end();
-                let timestamp = keepalive.timestamp();
-                let reply_requested: bool = keepalive.reply() != 0;
-
-                trace!(
-                    "received PrimaryKeepAlive(wal_end: {}, timestamp: {:?} reply: {})",
-                    wal_end,
-                    timestamp,
-                    reply_requested,
-                );
-                if reply_requested {
-                    // TODO: More thought should go into what values are sent here.
-                    let last_lsn = PgLsn::from(u64::from(timeline.get_last_valid_lsn()));
-                    let write_lsn = last_lsn;
-                    let flush_lsn = last_lsn;
-                    let apply_lsn = PgLsn::from(0);
-                    let ts = SystemTime::now();
-                    const NO_REPLY: u8 = 0u8;
-
-                    physical_stream
-                        .standby_status_update(write_lsn, flush_lsn, apply_lsn, ts, NO_REPLY)?;
-                }
+            ReplicationMessage::PrimaryKeepAlive(_keepalive) => {
+                trace!("received PrimaryKeepAlive");
+                // FIXME: Reply, or the connection will time out
            }
            _ => (),
        }
    }
-    Ok(())
-}
-
-/// Data returned from the postgres `IDENTIFY_SYSTEM` command
-///
-/// See the [postgres docs] for more details.
-///
-/// [postgres docs]: https://www.postgresql.org/docs/current/protocol-replication.html
-#[derive(Debug)]
-pub struct IdentifySystem {
-    systemid: u64,
-    timeline: u32,
-    xlogpos: PgLsn,
-    dbname: Option<String>,
-}
-
-/// There was a problem parsing the response to
-/// a postgres IDENTIFY_SYSTEM command.
-#[derive(Debug, thiserror::Error)]
-#[error("IDENTIFY_SYSTEM parse error")]
-pub struct IdentifyError;
-
-/// Run the postgres `IDENTIFY_SYSTEM` command
-pub fn identify_system(client: &mut Client) -> Result<IdentifySystem, Error> {
-    let query_str = "IDENTIFY_SYSTEM";
-    let response = client.simple_query(query_str)?;
-
-    // get(N) from row, then parse it as some destination type.
-    fn get_parse<T>(row: &SimpleQueryRow, idx: usize) -> Result<T, IdentifyError>
-    where
-        T: FromStr,
-    {
-        let val = row.get(idx).ok_or(IdentifyError)?;
-        val.parse::<T>().or(Err(IdentifyError))
-    }
-
-    // extract the row contents into an IdentifySystem struct.
-    // written as a closure so I can use ? for Option here.
-    if let Some(SimpleQueryMessage::Row(first_row)) = response.get(0) {
-        Ok(IdentifySystem {
-            systemid: get_parse(first_row, 0)?,
-            timeline: get_parse(first_row, 1)?,
-            xlogpos: get_parse(first_row, 2)?,
-            dbname: get_parse(first_row, 3).ok(),
-        })
-    } else {
-        Err(IdentifyError.into())
-    }
-}
-
-fn write_wal_file(
-    startpos: Lsn,
-    timeline: ZTimelineId,
-    wal_seg_size: usize,
-    buf: &[u8],
-) -> anyhow::Result<()> {
-    let mut bytes_left: usize = buf.len();
-    let mut bytes_written: usize = 0;
-    let mut partial;
-    let mut start_pos = startpos;
-    const ZERO_BLOCK: &[u8] = &[0u8; XLOG_BLCKSZ];
-
-    let wal_dir = PathBuf::from(format!("timelines/{}/wal", timeline));
-
-    /* Extract WAL location for this block */
-    let mut xlogoff = start_pos.segment_offset(wal_seg_size);
-
-    while bytes_left != 0 {
-        let bytes_to_write;
-
-        /*
-         * If crossing a WAL boundary, only write up until we reach wal
-         * segment size.
-         */
-        if xlogoff + bytes_left > wal_seg_size {
-            bytes_to_write = wal_seg_size - xlogoff;
-        } else {
-            bytes_to_write = bytes_left;
-        }
-
-        /* Open file */
-        let segno = start_pos.segment_number(wal_seg_size);
-        let wal_file_name = XLogFileName(
-            1, // FIXME: always use Postgres timeline 1
-            segno,
-            wal_seg_size,
-        );
-        let wal_file_path = wal_dir.join(wal_file_name.clone());
-        let wal_file_partial_path = wal_dir.join(wal_file_name.clone() + ".partial");
-
-        {
-            let mut wal_file: File;
-            /* Try to open already completed segment */
-            if let Ok(file) = OpenOptions::new().write(true).open(&wal_file_path) {
-                wal_file = file;
-                partial = false;
-            } else if let Ok(file) = OpenOptions::new().write(true).open(&wal_file_partial_path) {
-                /* Try to open existed partial file */
-                wal_file = file;
-                partial = true;
-            } else {
-                /* Create and fill new partial file */
-                partial = true;
-                match OpenOptions::new()
-                    .create(true)
-                    .write(true)
-                    .open(&wal_file_partial_path)
-                {
-                    Ok(mut file) => {
-                        for _ in 0..(wal_seg_size / XLOG_BLCKSZ) {
-                            file.write_all(&ZERO_BLOCK)?;
-                        }
-                        wal_file = file;
-                    }
-                    Err(e) => {
-                        error!("Failed to open log file {:?}: {}", &wal_file_path, e);
-                        return Err(e.into());
-                    }
-                }
-            }
-            wal_file.seek(SeekFrom::Start(xlogoff as u64))?;
-            wal_file.write_all(&buf[bytes_written..(bytes_written + bytes_to_write)])?;
-
-            // FIXME: Flush the file
-            //wal_file.sync_all()?;
-        }
-        /* Write was successful, advance our position */
-        bytes_written += bytes_to_write;
-        bytes_left -= bytes_to_write;
-        start_pos += bytes_to_write as u64;
-        xlogoff += bytes_to_write;
-
-        /* Did we reach the end of a WAL segment? */
-        if start_pos.segment_offset(wal_seg_size) == 0 {
-            xlogoff = 0;
-            if partial {
-                fs::rename(&wal_file_partial_path, &wal_file_path)?;
-            }
-        }
-    }
-    Ok(())
+    return Ok(());
 }
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -1,522 +1,162 @@
-//!
-//! WAL redo
-//!
-//! We rely on Postgres to perform WAL redo for us. We launch a
-//! postgres process in special "wal redo" mode that's similar to
-//! single-user mode. We then pass the the previous page image, if any,
-//! and all the WAL records we want to apply, to the postgres
-//! process. Then we get the page image back. Communication with the
-//! postgres process happens via stdin/stdout
-//!
-//! See src/backend/tcop/zenith_wal_redo.c for the other side of
-//! this communication.
-//!
-//! TODO: Even though the postgres code runs in a separate process,
-//! it's not a secure sandbox.
-//!
-use byteorder::{ByteOrder, LittleEndian};
-use bytes::{Buf, BufMut, Bytes, BytesMut};
+//
+// WAL redo
+//
+// We rely on Postgres to perform WAL redo for us. We launch a
+// postgres process in special "wal redo" mode that's similar to
+// single-user mode. We then pass the the previous page image, if any,
+// and all the WAL records we want to apply, to the postgress
+// process. Then we get the page image back. Communication with the
+// postgres process happens via stdin/stdout
+//
+// See src/backend/tcop/zenith_wal_redo.c for the other side of
+// this communication.
+//
+// TODO: Even though the postgres code runs in a separate process,
+// it's not a secure sandbox.
+//
 use log::*;
 use std::assert;
 use std::cell::RefCell;
 use std::fs;
-use std::fs::OpenOptions;
-use std::io::prelude::*;
 use std::io::Error;
-use std::path::{Path, PathBuf};
-use std::process::Stdio;
-use std::sync::mpsc;
-use std::sync::Mutex;
+use std::sync::Arc;
 use std::time::Duration;
 use std::time::Instant;
+use std::{path::PathBuf, process::Stdio};
 use tokio::io::AsyncBufReadExt;
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
-use tokio::process::{ChildStdin, ChildStdout, Command};
+use tokio::process::{Child, ChildStdin, ChildStdout, Command};
+use tokio::runtime::Runtime;
 use tokio::time::timeout;
-use zenith_utils::lsn::Lsn;

-use crate::repository::BufferTag;
-use crate::repository::WALRecord;
-use crate::waldecoder::{MultiXactId, XlMultiXactCreate};
-use crate::PageServerConf;
-use postgres_ffi::nonrelfile_utils::transaction_id_set_status;
-use postgres_ffi::pg_constants;
-use postgres_ffi::xlog_utils::XLogRecord;
+use bytes::{BufMut, Bytes, BytesMut};

-///
-/// WAL Redo Manager is responsible for replaying WAL records.
-///
-/// Callers use the WAL redo manager through this abstract interface,
-/// which makes it easy to mock it in tests.
-pub trait WalRedoManager: Send + Sync {
-    /// Apply some WAL records.
-    ///
-    /// The caller passes an old page image, and WAL records that should be
-    /// applied over it. The return value is a new page image, after applying
-    /// the reords.
-    fn request_redo(
-        &self,
-        tag: BufferTag,
-        lsn: Lsn,
-        base_img: Option<Bytes>,
-        records: Vec<WALRecord>,
-    ) -> Result<Bytes, WalRedoError>;
-}
+use crate::page_cache;
+use crate::page_cache::CacheEntry;
+use crate::page_cache::WALRecord;
+use crate::{page_cache::BufferTag, PageServerConf};

 static TIMEOUT: Duration = Duration::from_secs(20);

-///
-/// The implementation consists of two parts: PostgresRedoManager, and
-/// PostgresRedoManagerInternal. PostgresRedoManager is the public struct
-/// that can be used to send redo requests to the manager.
-/// PostgresRedoManagerInternal is used by the manager thread itself.
-///
-pub struct PostgresRedoManager {
-    request_tx: Mutex<mpsc::Sender<WalRedoRequest>>,
-}
+//
+// Main entry point for the WAL applicator thread.
+//
+pub fn wal_redo_main(conf: PageServerConf, sys_id: u64) {
+    info!("WAL redo thread started {}", sys_id);

-struct PostgresRedoManagerInternal {
-    conf: &'static PageServerConf,
+    // We block on waiting for requests on the walredo request channel, but
+    // use async I/O to communicate with the child process. Initialize the
+    // runtime for the async part.
+    let runtime = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .unwrap();

-    request_rx: mpsc::Receiver<WalRedoRequest>,
-}
+    let pcache = page_cache::get_pagecache(conf.clone(), sys_id);

-#[derive(Debug)]
-struct WalRedoRequest {
-    tag: BufferTag,
-    lsn: Lsn,
+    // Loop forever, handling requests as they come.
+    let walredo_channel_receiver = &pcache.walredo_receiver;
+    loop {
+        let mut process: WalRedoProcess;
+        let datadir = conf.data_dir.join(format!("wal-redo/{}", sys_id));

-    base_img: Option<Bytes>,
-    records: Vec<WALRecord>,
-
-    response_channel: mpsc::Sender<Result<Bytes, WalRedoError>>,
-}
-
-/// An error happened in WAL redo
-#[derive(Debug, thiserror::Error)]
-pub enum WalRedoError {
-    #[error(transparent)]
-    IoError(#[from] std::io::Error),
-}
-
-///
-/// Public interface of WAL redo manager
-///
-impl PostgresRedoManager {
-    ///
-    /// Create a new PostgresRedoManager.
-    ///
-    /// This launches a new thread to handle the requests.
-    pub fn new(conf: &'static PageServerConf) -> PostgresRedoManager {
-        let (tx, rx) = mpsc::channel();
-
-        //
-        // Launch the WAL redo thread
-        //
-        // Get mutable references to the values that we need to pass to the
-        // thread.
-        let request_rx = rx;
-
-        // Currently, the join handle is not saved anywhere and we
-        // won't try restart the thread if it dies.
-        let _walredo_thread = std::thread::Builder::new()
-            .name("WAL redo thread".into())
-            .spawn(move || {
-                let mut internal = PostgresRedoManagerInternal { conf, request_rx };
-                internal.wal_redo_main();
-            })
-            .unwrap();
-
-        PostgresRedoManager {
-            request_tx: Mutex::new(tx),
+        info!("launching WAL redo postgres process {}", sys_id);
+        {
+            let _guard = runtime.enter();
+            process = WalRedoProcess::launch(&datadir, &runtime).unwrap();
        }
-    }
-}

-impl WalRedoManager for PostgresRedoManager {
-    ///
-    /// Request the WAL redo manager to apply some WAL records
-    ///
-    /// The WAL redo is handled by a separate thread, so this just sends a request
-    /// to the thread and waits for response.
-    ///
-    fn request_redo(
-        &self,
-        tag: BufferTag,
-        lsn: Lsn,
-        base_img: Option<Bytes>,
-        records: Vec<WALRecord>,
-    ) -> Result<Bytes, WalRedoError> {
-        // Create a channel where to receive the response
-        let (tx, rx) = mpsc::channel::<Result<Bytes, WalRedoError>>();
+        // Pretty arbitrarily, reuse the same Postgres process for 100 requests.
+        // After that, kill it and start a new one. This is mostly to avoid
+        // using up all shared buffers in Postgres's shared buffer cache; we don't
+        // want to write any pages to disk in the WAL redo process.
+        for _i in 1..100 {
+            let request = walredo_channel_receiver.recv().unwrap();

-        let request = WalRedoRequest {
-            tag,
-            lsn,
-            base_img,
-            records,
-            response_channel: tx,
-        };
-
-        self.request_tx
-            .lock()
-            .unwrap()
-            .send(request)
-            .expect("could not send WAL redo request");
-
-        rx.recv()
-            .expect("could not receive response to WAL redo request")
-    }
-}
-
-fn mx_offset_to_flags_offset(xid: MultiXactId) -> usize {
-    return ((xid / pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP as u32) as u16
-        % pg_constants::MULTIXACT_MEMBERGROUPS_PER_PAGE
-        * pg_constants::MULTIXACT_MEMBERGROUP_SIZE) as usize;
-}
-
-fn mx_offset_to_flags_bitshift(xid: MultiXactId) -> u16 {
-    return (xid as u16) % pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP
-        * pg_constants::MXACT_MEMBER_BITS_PER_XACT;
-}
-
-/* Location (byte offset within page) of TransactionId of given member */
-fn mx_offset_to_member_offset(xid: MultiXactId) -> usize {
-    return mx_offset_to_flags_offset(xid)
-        + (pg_constants::MULTIXACT_FLAGBYTES_PER_GROUP
-            + (xid as u16 % pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP) * 4)
-            as usize;
-}
-
-///
-/// WAL redo thread
-///
-impl PostgresRedoManagerInternal {
-    //
-    // Main entry point for the WAL applicator thread.
-    //
-    fn wal_redo_main(&mut self) {
-        info!("WAL redo thread started");
-
-        // We block on waiting for requests on the walredo request channel, but
-        // use async I/O to communicate with the child process. Initialize the
-        // runtime for the async part.
-        let runtime = tokio::runtime::Builder::new_current_thread()
-            .enable_all()
-            .build()
-            .unwrap();
-
-        let process: PostgresRedoProcess;
-
-        // FIXME: We need a dummy Postgres cluster to run the process in. Currently, we
-        // just create one with constant name. That fails if you try to launch more than
-        // one WAL redo manager concurrently.
-        let datadir = self.conf.workdir.join("wal-redo-datadir");
-
-        info!("launching WAL redo postgres process");
-
-        process = runtime
-            .block_on(PostgresRedoProcess::launch(&datadir))
-            .unwrap();
-
-        // Loop forever, handling requests as they come.
-        loop {
-            let request = self
-                .request_rx
-                .recv()
-                .expect("WAL redo request channel was closed");
-
-            let result = runtime.block_on(self.handle_apply_request(&process, &request));
-            let result_ok = result.is_ok();
-
-            // Send the result to the requester
-            let _ = request.response_channel.send(result);
-
-            if !result_ok {
-                error!("wal-redo-postgres failed to apply request {:?}", request);
+            let result = handle_apply_request(&pcache, &process, &runtime, request);
+            if result.is_err() {
+                // On error, kill the process.
+                break;
            }
        }
-    }

-    ///
-    /// Process one request for WAL redo.
-    ///
-    async fn handle_apply_request(
-        &self,
-        process: &PostgresRedoProcess,
-        request: &WalRedoRequest,
-    ) -> Result<Bytes, WalRedoError> {
-        let tag = request.tag;
-        let lsn = request.lsn;
-        let base_img = request.base_img.clone();
-        let records = &request.records;
-
-        let nrecords = records.len();
-
-        let start = Instant::now();
-
-        let apply_result: Result<Bytes, Error>;
-        if tag.rel.forknum > pg_constants::INIT_FORKNUM {
-            const ZERO_PAGE: [u8; 8192] = [0u8; 8192];
-            let mut page = BytesMut::new();
-            if let Some(fpi) = base_img {
-                page.extend_from_slice(&fpi[..]);
-            } else {
-                page.extend_from_slice(&ZERO_PAGE);
-            }
-            for record in records {
-                let mut buf = record.rec.clone();
-
-                // 1. Parse XLogRecord struct
-                // FIXME: refactor to avoid code duplication.
-                let xlogrec = XLogRecord::from_bytes(&mut buf);
-
-                //move to main data
-                // TODO probably, we should store some records in our special format
-                // to avoid this weird parsing on replay
-                let skip = (record.main_data_offset - pg_constants::SIZEOF_XLOGRECORD) as usize;
-                if buf.remaining() > skip {
-                    buf.advance(skip);
-                }
-
-                if xlogrec.xl_rmid == pg_constants::RM_CLOG_ID {
-                    let info = xlogrec.xl_info & !pg_constants::XLR_INFO_MASK;
-                    if info == pg_constants::CLOG_ZEROPAGE {
-                        page.copy_from_slice(&ZERO_PAGE);
-                    }
-                } else if xlogrec.xl_rmid == pg_constants::RM_XACT_ID {
-                    let info = xlogrec.xl_info & pg_constants::XLOG_XACT_OPMASK;
-                    let mut status = 0;
-                    if info == pg_constants::XLOG_XACT_COMMIT || info == pg_constants::XLOG_XACT_COMMIT_PREPARED {
-                        status = pg_constants::TRANSACTION_STATUS_COMMITTED;
-						if info == pg_constants::XLOG_XACT_COMMIT {
-							transaction_id_set_status(xlogrec.xl_xid, status, &mut page);
-						}
-                        //handle subtrans
-                        let _xact_time = buf.get_i64_le();
-                        let mut xinfo = 0;
-                        if xlogrec.xl_info & pg_constants::XLOG_XACT_HAS_INFO != 0 {
-                            xinfo = buf.get_u32_le();
-                            if xinfo & pg_constants::XACT_XINFO_HAS_DBINFO != 0 {
-                                let _dbid = buf.get_u32_le();
-                                let _tsid = buf.get_u32_le();
-                            }
-                        }
-
-                        if xinfo & pg_constants::XACT_XINFO_HAS_SUBXACTS != 0 {
-                            let nsubxacts = buf.get_i32_le();
-                            for _i in 0..nsubxacts {
-                                let subxact = buf.get_u32_le();
-                                let blkno = subxact as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
-                                // only update xids on the requested page
-                                if tag.blknum == blkno {
-                                    status = pg_constants::TRANSACTION_STATUS_SUB_COMMITTED;
-                                    transaction_id_set_status(subxact, status, &mut page);
-                                }
-                            }
-                        }
-						if info == pg_constants::XLOG_XACT_COMMIT_PREPARED {
-							if xinfo & pg_constants::XACT_XINFO_HAS_RELFILENODES != 0 {
-								let nrels = buf.get_i32_le();
-								for _i in 0..nrels {
-									let spcnode = buf.get_u32_le();
-									let dbnode = buf.get_u32_le();
-									let relnode = buf.get_u32_le();
-									//TODO handle this too?
-									trace!(
-										"XLOG_XACT_COMMIT relfilenode {}/{}/{}",
-										spcnode,
-										dbnode,
-										relnode
-									);
-								}
-							}
-							if xinfo & pg_constants::XACT_XINFO_HAS_INVALS != 0 {
-								let nmsgs = buf.get_i32_le();
-								for _i in 0..nmsgs {
-									let sizeof_shared_invalidation_message = 0;
-									buf.advance(sizeof_shared_invalidation_message);
-								}
-							}
-							assert!((xinfo & pg_constants::XACT_XINFO_HAS_TWOPHASE) != 0);
-							let xid = buf.get_u32_le();
-							transaction_id_set_status(xid, status, &mut page);
-						}
-                    } else if info == pg_constants::XLOG_XACT_ABORT || info == pg_constants::XLOG_XACT_ABORT_PREPARED {
-                        status = pg_constants::TRANSACTION_STATUS_ABORTED;
-						if info == pg_constants::XLOG_XACT_ABORT {
-							transaction_id_set_status(xlogrec.xl_xid, status, &mut page);
-						}
-                        //handle subtrans
-                        let _xact_time = buf.get_i64_le();
-                        let mut xinfo = 0;
-                        if xlogrec.xl_info & pg_constants::XLOG_XACT_HAS_INFO != 0 {
-                            xinfo = buf.get_u32_le();
-                            if xinfo & pg_constants::XACT_XINFO_HAS_DBINFO != 0 {
-                                let _dbid = buf.get_u32_le();
-                                let _tsid = buf.get_u32_le();
-                            }
-                        }
-
-                        if xinfo & pg_constants::XACT_XINFO_HAS_SUBXACTS != 0 {
-                            let nsubxacts = buf.get_i32_le();
-                            for _i in 0..nsubxacts {
-                                let subxact = buf.get_u32_le();
-                                let blkno = subxact as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
-                                // only update xids on the requested page
-                                if tag.blknum == blkno {
-                                    status = pg_constants::TRANSACTION_STATUS_ABORTED;
-                                    transaction_id_set_status(subxact, status, &mut page);
-                                }
-                            }
-                        }
-						if info == pg_constants::XLOG_XACT_ABORT_PREPARED {
-							if xinfo & pg_constants::XACT_XINFO_HAS_RELFILENODES != 0 {
-								let nrels = buf.get_i32_le();
-								for _i in 0..nrels {
-									let spcnode = buf.get_u32_le();
-									let dbnode = buf.get_u32_le();
-									let relnode = buf.get_u32_le();
-									//TODO handle this too?
-									trace!(
-										"XLOG_XACT_COMMIT relfilenode {}/{}/{}",
-										spcnode,
-										dbnode,
-										relnode
-									);
-								}
-							}
-							if xinfo & pg_constants::XACT_XINFO_HAS_INVALS != 0 {
-								let nmsgs = buf.get_i32_le();
-								for _i in 0..nmsgs {
-									let sizeof_shared_invalidation_message = 0;
-									buf.advance(sizeof_shared_invalidation_message);
-								}
-							}
-							assert!((xinfo & pg_constants::XACT_XINFO_HAS_TWOPHASE) != 0);
-							let xid = buf.get_u32_le();
-							transaction_id_set_status(xid, status, &mut page);
-						}
-                    } else if info == pg_constants::XLOG_XACT_PREPARE {
-						info!("Apply prepare {} record", xlogrec.xl_xid);
-						page.clear();
-						page.extend_from_slice(&buf[..]);
-					} else {
-                        error!("handle_apply_request for RM_XACT_ID-{} NOT SUPPORTED YET. RETURN. lsn {} main_data_offset {}, rec.len {}",
-                               status,
-                               record.lsn,
-                               record.main_data_offset, record.rec.len());
-                    }
-                } else if xlogrec.xl_rmid == pg_constants::RM_MULTIXACT_ID {
-                    let info = xlogrec.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
-                    if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE {
-                        page.copy_from_slice(&ZERO_PAGE);
-                    } else if info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE {
-                        page.copy_from_slice(&ZERO_PAGE);
-                    } else if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
-                        let xlrec = XlMultiXactCreate::decode(&mut buf);
-                        if tag.rel.forknum == pg_constants::PG_MXACT_OFFSETS_FORKNUM {
-                            let offs = (xlrec.mid % pg_constants::MULTIXACT_OFFSETS_PER_PAGE as u32
-                                * 4) as usize;
-                            LittleEndian::write_u32(&mut page[offs..offs + 4], xlrec.moff);
-                        } else {
-                            assert!(tag.rel.forknum == pg_constants::PG_MXACT_MEMBERS_FORKNUM);
-                            for i in 0..xlrec.nmembers {
-                                let blkno = i / pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32;
-                                if blkno == tag.blknum {
-                                    // update only target block
-                                    let offset = xlrec.moff + i;
-                                    let memberoff = mx_offset_to_member_offset(offset);
-                                    let flagsoff = mx_offset_to_flags_offset(offset);
-                                    let bshift = mx_offset_to_flags_bitshift(offset);
-                                    let mut flagsval =
-                                        LittleEndian::read_u32(&page[flagsoff..flagsoff + 4]);
-                                    flagsval &=
-                                        !(((1 << pg_constants::MXACT_MEMBER_BITS_PER_XACT) - 1)
-                                            << bshift);
-                                    flagsval |= xlrec.members[i as usize].status << bshift;
-                                    LittleEndian::write_u32(
-                                        &mut page[flagsoff..flagsoff + 4],
-                                        flagsval,
-                                    );
-                                    LittleEndian::write_u32(
-                                        &mut page[memberoff..memberoff + 4],
-                                        xlrec.members[i as usize].xid,
-                                    );
-                                }
-                            }
-                        }
-                    } else if info == pg_constants::XLOG_MULTIXACT_TRUNCATE_ID {
-                        // empty page image indicates that this SLRU page is truncated and can be removed by GC
-                        page.clear();
-                    } else {
-                        assert!(false);
-                    }
-                } else if xlogrec.xl_rmid == pg_constants::RM_RELMAP_ID {
-                    page.clear();
-                    page.extend_from_slice(&buf[12..]); // skip xl_relmap_update
-                    assert!(page.len() == 512); // size of pg_filenode.map
-                }
-            }
-
-            apply_result = Ok::<Bytes, Error>(page.freeze());
-        } else {
-            apply_result = process.apply_wal_records(tag, base_img, records).await;
-        }
-
-        let duration = start.elapsed();
-
-        let result: Result<Bytes, WalRedoError>;
-
-        trace!(
-            "applied {} WAL records in {} ms to reconstruct page image at LSN {}",
-            nrecords,
-            duration.as_millis(),
-            lsn
-        );
-
-        if let Err(e) = apply_result {
-            error!("could not apply WAL records: {}", e);
-            result = Err(WalRedoError::IoError(e));
-        } else {
-            let img = apply_result.unwrap();
-
-            result = Ok(img);
-        }
-
-        // The caller is responsible for sending the response
-        result
+        info!("killing WAL redo postgres process");
+        let _ = runtime.block_on(process.stdin.get_mut().shutdown());
+        let mut child = process.child;
+        drop(process.stdin);
+        let _ = runtime.block_on(child.wait());
    }
 }

-struct PostgresRedoProcess {
+fn handle_apply_request(
+    pcache: &page_cache::PageCache,
+    process: &WalRedoProcess,
+    runtime: &Runtime,
+    entry_rc: Arc<CacheEntry>,
+) -> Result<(), Error> {
+    let tag = entry_rc.key.tag;
+    let lsn = entry_rc.key.lsn;
+    let (base_img, records) = pcache.collect_records_for_apply(entry_rc.as_ref());
+
+    let mut entry = entry_rc.content.lock().unwrap();
+    entry.apply_pending = false;
+
+    let nrecords = records.len();
+
+    let start = Instant::now();
+    let apply_result = process.apply_wal_records(runtime, tag, base_img, records);
+    let duration = start.elapsed();
+
+    let result;
+
+    debug!(
+        "applied {} WAL records in {} ms to reconstruct page image at LSN {:X}/{:X}",
+        nrecords,
+        duration.as_millis(),
+        lsn >> 32,
+        lsn & 0xffff_ffff
+    );
+
+    if let Err(e) = apply_result {
+        error!("could not apply WAL records: {}", e);
+        result = Err(e);
+    } else {
+        entry.page_image = Some(apply_result.unwrap());
+        pcache
+            .num_page_images
+            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+        result = Ok(());
+    }
+
+    // Wake up the requester, whether the operation succeeded or not.
+    entry_rc.walredo_condvar.notify_all();
+
+    return result;
+}
+
+struct WalRedoProcess {
+    child: Child,
    stdin: RefCell<ChildStdin>,
    stdout: RefCell<ChildStdout>,
 }

-impl PostgresRedoProcess {
+impl WalRedoProcess {
    //
    // Start postgres binary in special WAL redo mode.
    //
    // Tests who run pageserver binary are setting proper PG_BIN_DIR
-    // and PG_LIB_DIR so that WalRedo would start right postgres.
-
-    // do that: We may later
+    // and PG_LIB_DIR so that WalRedo would start right postgres. We may later
    // switch to setting same things in pageserver config file.
-    async fn launch(datadir: &Path) -> Result<PostgresRedoProcess, Error> {
-        // Create empty data directory for wal-redo postgres, deleting old one first.
-        if datadir.exists() {
-            info!("directory {:?} exists, removing", &datadir);
-            if let Err(e) = fs::remove_dir_all(&datadir) {
-                error!("could not remove old wal-redo-datadir: {:?}", e);
-            }
-        }
-        info!("running initdb in {:?}", datadir.display());
-        let initdb = Command::new("initdb")
-            .args(&["-D", datadir.to_str().unwrap()])
-            .arg("-N")
-            .output()
-            .await
+    fn launch(datadir: &PathBuf, runtime: &Runtime) -> Result<WalRedoProcess, Error> {
+        // Create empty data directory for wal-redo postgres deleting old one.
+        fs::remove_dir_all(datadir.to_str().unwrap()).ok();
+        let initdb = runtime
+            .block_on(
+                Command::new("initdb")
+                    .args(&["-D", datadir.to_str().unwrap()])
+                    .arg("-N")
+                    .output(),
+            )
            .expect("failed to execute initdb");

        if !initdb.status.success() {
@@ -525,29 +165,21 @@ impl PostgresRedoProcess {
                std::str::from_utf8(&initdb.stdout).unwrap(),
                std::str::from_utf8(&initdb.stderr).unwrap()
            );
-        } else {
-            // Limit shared cache for wal-redo-postres
-            let mut config = OpenOptions::new()
-                .append(true)
-                .open(PathBuf::from(&datadir).join("postgresql.conf"))?;
-            config.write_all(b"shared_buffers=128kB\n")?;
-            config.write_all(b"fsync=off\n")?;
-            config.write_all(b"shared_preload_libraries=zenith\n")?;
-            config.write_all(b"zenith.wal_redo=on\n")?;
        }
+
        // Start postgres itself
        let mut child = Command::new("postgres")
            .arg("--wal-redo")
            .stdin(Stdio::piped())
            .stderr(Stdio::piped())
            .stdout(Stdio::piped())
-            .env("PGDATA", datadir)
+            .env("PGDATA", datadir.to_str().unwrap())
            .spawn()
            .expect("postgres --wal-redo command failed to start");

        info!(
-            "launched WAL redo postgres process on {:?}",
-            datadir.display()
+            "launched WAL redo postgres process on {}",
+            datadir.to_str().unwrap()
        );

        let stdin = child.stdin.take().expect("failed to open child's stdin");
@@ -568,14 +200,15 @@ impl PostgresRedoProcess {
                if res.unwrap() == 0 {
                    break;
                }
-                error!("wal-redo-postgres: {}", line.trim());
+                debug!("wal-redo-postgres: {}", line.trim());
                line.clear();
            }
            Ok::<(), Error>(())
        };
        tokio::spawn(f_stderr);

-        Ok(PostgresRedoProcess {
+        Ok(WalRedoProcess {
+            child: child,
            stdin: RefCell::new(stdin),
            stdout: RefCell::new(stdout),
        })
@@ -585,132 +218,146 @@ impl PostgresRedoProcess {
    // Apply given WAL records ('records') over an old page image. Returns
    // new page image.
    //
-    async fn apply_wal_records(
+    fn apply_wal_records(
        &self,
+        runtime: &Runtime,
        tag: BufferTag,
        base_img: Option<Bytes>,
-        records: &[WALRecord],
-    ) -> Result<Bytes, std::io::Error> {
+        records: Vec<WALRecord>,
+    ) -> Result<Bytes, Error> {
        let mut stdin = self.stdin.borrow_mut();
        let mut stdout = self.stdout.borrow_mut();
-
-        // We do three things simultaneously: send the old base image and WAL records to
-        // the child process's stdin, read the result from child's stdout, and forward any logging
-        // information that the child writes to its stderr to the page server's log.
-        //
-        // 'f_stdin' handles writing the base image and WAL records to the child process.
-        // 'f_stdout' below reads the result back. And 'f_stderr', which was spawned into the
-        // tokio runtime in the 'launch' function already, forwards the logging.
-        let f_stdin = async {
-            // Send base image, if any. (If the record initializes the page, previous page
-            // version is not needed.)
-            timeout(
-                TIMEOUT,
-                stdin.write_all(&build_begin_redo_for_block_msg(tag)),
-            )
-            .await??;
-            if base_img.is_some() {
+        return runtime.block_on(async {
+            //
+            // This async block sends all the commands to the process.
+            //
+            // For reasons I don't understand, this needs to be a "move" block;
+            // otherwise the stdin pipe doesn't get closed, despite the shutdown()
+            // call.
+            //
+            let f_stdin = async {
+                // Send base image, if any. (If the record initializes the page, previous page
+                // version is not needed.)
                timeout(
                    TIMEOUT,
-                    stdin.write_all(&build_push_page_msg(tag, base_img.unwrap())),
+                    stdin.write_all(&build_begin_redo_for_block_msg(tag)),
                )
                .await??;
-            }
+                if base_img.is_some() {
+                    timeout(
+                        TIMEOUT,
+                        stdin.write_all(&build_push_page_msg(tag, base_img.unwrap())),
+                    )
+                    .await??;
+                }

-            // Send WAL records.
-            for rec in records.iter() {
-                let r = rec.clone();
+                // Send WAL records.
+                for rec in records.iter() {
+                    let r = rec.clone();

-                stdin
-                    .write_all(&build_apply_record_msg(r.lsn, r.rec))
-                    .await?;
+                    stdin
+                        .write_all(&build_apply_record_msg(r.lsn, r.rec))
+                        .await?;

-                //debug!("sent WAL record to wal redo postgres process ({:X}/{:X}",
-                //       r.lsn >> 32, r.lsn & 0xffff_ffff);
-            }
-            //debug!("sent {} WAL records to wal redo postgres process ({:X}/{:X}",
-            //       records.len(), lsn >> 32, lsn & 0xffff_ffff);
+                    //debug!("sent WAL record to wal redo postgres process ({:X}/{:X}",
+                    //       r.lsn >> 32, r.lsn & 0xffff_ffff);
+                }
+                //debug!("sent {} WAL records to wal redo postgres process ({:X}/{:X}",
+                //       records.len(), lsn >> 32, lsn & 0xffff_ffff);

-            // Send GetPage command to get the result back
-            timeout(TIMEOUT, stdin.write_all(&build_get_page_msg(tag))).await??;
-            timeout(TIMEOUT, stdin.flush()).await??;
-            //debug!("sent GetPage for {}", tag.blknum);
-            Ok::<(), Error>(())
-        };
+                // Send GetPage command to get the result back
+                timeout(TIMEOUT, stdin.write_all(&build_get_page_msg(tag))).await??;
+                timeout(TIMEOUT, stdin.flush()).await??;
+                //debug!("sent GetPage for {}", tag.blknum);
+                Ok::<(), Error>(())
+            };

-        // Read back new page image
-        let f_stdout = async {
-            let mut buf = [0u8; 8192];
+            // Read back new page image
+            let f_stdout = async {
+                let mut buf = [0u8; 8192];

-            timeout(TIMEOUT, stdout.read_exact(&mut buf)).await??;
-            //debug!("got response for {}", tag.blknum);
-            Ok::<[u8; 8192], Error>(buf)
-        };
+                timeout(TIMEOUT, stdout.read_exact(&mut buf)).await??;
+                //debug!("got response for {}", tag.blknum);
+                Ok::<[u8; 8192], Error>(buf)
+            };

-        let res = tokio::try_join!(f_stdout, f_stdin)?;
+            // Kill the process. This closes its stdin, which should signal the process
+            // to terminate. TODO: SIGKILL if needed
+            //child.wait();

-        let buf = res.0;
+            let res = futures::try_join!(f_stdout, f_stdin)?;

-        Ok::<Bytes, Error>(Bytes::from(std::vec::Vec::from(buf)))
+            let buf = res.0;
+
+            Ok::<Bytes, Error>(Bytes::from(std::vec::Vec::from(buf)))
+        });
    }
 }

-// Functions for constructing messages to send to the postgres WAL redo
-// process. See vendor/postgres/src/backend/tcop/zenith_wal_redo.c for
-// explanation of the protocol.
-
 fn build_begin_redo_for_block_msg(tag: BufferTag) -> Bytes {
-    let len = 4 + 1 + 4 * 4;
+    let len = 4 + 5 * 4;
    let mut buf = BytesMut::with_capacity(1 + len);

-    buf.put_u8(b'B');
+    buf.put_u8('B' as u8);
    buf.put_u32(len as u32);
-    tag.pack(&mut buf);
+    buf.put_u32(tag.spcnode);
+    buf.put_u32(tag.dbnode);
+    buf.put_u32(tag.relnode);
+    buf.put_u32(tag.forknum as u32);
+    buf.put_u32(tag.blknum);

    assert!(buf.len() == 1 + len);

-    buf.freeze()
+    return buf.freeze();
 }

 fn build_push_page_msg(tag: BufferTag, base_img: Bytes) -> Bytes {
    assert!(base_img.len() == 8192);

-    let len = 4 + 1 + 4 * 4 + base_img.len();
+    let len = 4 + 5 * 4 + base_img.len();
    let mut buf = BytesMut::with_capacity(1 + len);

-    buf.put_u8(b'P');
+    buf.put_u8('P' as u8);
    buf.put_u32(len as u32);
-    tag.pack(&mut buf);
+    buf.put_u32(tag.spcnode);
+    buf.put_u32(tag.dbnode);
+    buf.put_u32(tag.relnode);
+    buf.put_u32(tag.forknum as u32);
+    buf.put_u32(tag.blknum);
    buf.put(base_img);

    assert!(buf.len() == 1 + len);

-    buf.freeze()
+    return buf.freeze();
 }

-fn build_apply_record_msg(endlsn: Lsn, rec: Bytes) -> Bytes {
+fn build_apply_record_msg(endlsn: u64, rec: Bytes) -> Bytes {
    let len = 4 + 8 + rec.len();
    let mut buf = BytesMut::with_capacity(1 + len);

-    buf.put_u8(b'A');
+    buf.put_u8('A' as u8);
    buf.put_u32(len as u32);
-    buf.put_u64(endlsn.0);
+    buf.put_u64(endlsn);
    buf.put(rec);

    assert!(buf.len() == 1 + len);

-    buf.freeze()
+    return buf.freeze();
 }

 fn build_get_page_msg(tag: BufferTag) -> Bytes {
-    let len = 4 + 1 + 4 * 4;
+    let len = 4 + 5 * 4;
    let mut buf = BytesMut::with_capacity(1 + len);

-    buf.put_u8(b'G');
+    buf.put_u8('G' as u8);
    buf.put_u32(len as u32);
-    tag.pack(&mut buf);
+    buf.put_u32(tag.spcnode);
+    buf.put_u32(tag.dbnode);
+    buf.put_u32(tag.relnode);
+    buf.put_u32(tag.forknum as u32);
+    buf.put_u32(tag.blknum);

    assert!(buf.len() == 1 + len);

-    buf.freeze()
+    return buf.freeze();
 }
--- a/pgbuild.sh
+++ b/pgbuild.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+#   Purpose of this script is to build and install postgres in a local directory
+# so that zenith intergation tests would find pg binaries and support files.
+#
+# ./pgbuild.sh would do following:
+#
+#   1) run out-of-source build of postgres in REPO_ROOT/tmp_install/build directory (I'm reusing
+#  tmp_install path here since it is already present in .gitignore)
+#
+#   2) installs postgres to REPO_ROOT/tmp_install/
+#
+REPO_ROOT=$(dirname "$0")
+REPO_ROOT="`( cd \"$REPO_ROOT\" && pwd )`"
+
+# configure
+echo "Configuring postgres build"
+mkdir -p $REPO_ROOT/tmp_install/build
+cd $REPO_ROOT/tmp_install/build
+../../vendor/postgres/configure CFLAGS='-O0' --enable-debug --enable-cassert \
+    --enable-depend --with-libxml --prefix=/ > configure.log
+
+# compile
+echo "Compiling postgres"
+make -j8 -s
+export DESTDIR=$REPO_ROOT/tmp_install
+
+echo "Installing postgres to $DESTDIR"
+make install -s
--- a/postgres_ffi/Cargo.toml
+++ b/postgres_ffi/Cargo.toml
@@ -1,24 +0,0 @@
-[package]
-name = "postgres_ffi"
-version = "0.1.0"
-authors = ["Heikki Linnakangas <heikki@zenith.tech>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-chrono = "0.4.19"
-rand = "0.8.3"
-regex = "1.4.5"
-bytes = "1.0.1"
-byteorder = "1.4.3"
-anyhow = "1.0"
-crc32c = "0.6.0"
-hex = "0.4.3"
-lazy_static = "1.4"
-log = "0.4.14"
-thiserror = "1.0"
-workspace_hack = { path = "../workspace_hack" }
-
-[build-dependencies]
-bindgen = "0.57"
--- a/postgres_ffi/README
+++ b/postgres_ffi/README
@@ -1,3 +0,0 @@
-This module contains utility functions for interacting with PostgreSQL
-file formats.
-
--- a/postgres_ffi/build.rs
+++ b/postgres_ffi/build.rs
@@ -1,44 +0,0 @@
-extern crate bindgen;
-
-use std::env;
-use std::path::PathBuf;
-
-fn main() {
-    // Tell cargo to invalidate the built crate whenever the wrapper changes
-    println!("cargo:rerun-if-changed=pg_control_ffi.h");
-
-    // The bindgen::Builder is the main entry point
-    // to bindgen, and lets you build up options for
-    // the resulting bindings.
-    let bindings = bindgen::Builder::default()
-        // The input header we would like to generate
-        // bindings for.
-        .header("pg_control_ffi.h")
-        // Tell cargo to invalidate the built crate whenever any of the
-        // included header files changed.
-        .parse_callbacks(Box::new(bindgen::CargoCallbacks))
-        .whitelist_type("ControlFileData")
-        .whitelist_type("CheckPoint")
-        .whitelist_type("FullTransactionId")
-        .whitelist_var("PG_CONTROL_FILE_SIZE")
-        .whitelist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
-        .whitelist_type("DBState")
-        // Path the server include dir. It is in tmp_install/include/server, if you did
-        // "configure --prefix=<path to tmp_install>". But if you used "configure --prefix=/",
-        // and used DESTDIR to move it into tmp_install, then it's in
-        // tmp_install/include/postgres/server
-        // 'pg_config --includedir-server' would perhaps be the more proper way to find it,
-        // but this will do for now.
-        .clang_arg("-I../tmp_install/include/server")
-        .clang_arg("-I../tmp_install/include/postgresql/server")
-        // Finish the builder and generate the bindings.
-        .generate()
-        // Unwrap the Result and panic on failure.
-        .expect("Unable to generate bindings");
-
-    // Write the bindings to the $OUT_DIR/bindings.rs file.
-    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
-    bindings
-        .write_to_file(out_path.join("bindings.rs"))
-        .expect("Couldn't write bindings!");
-}
--- a/postgres_ffi/pg_control_ffi.h
+++ b/postgres_ffi/pg_control_ffi.h
@@ -1,4 +0,0 @@
-#include "c.h"
-#include "catalog/pg_control.h"
-
-const uint32 PG_CONTROLFILEDATA_OFFSETOF_CRC = offsetof(ControlFileData, crc);
--- a/postgres_ffi/src/lib.rs
+++ b/postgres_ffi/src/lib.rs
@@ -1,112 +0,0 @@
-#![allow(non_upper_case_globals)]
-#![allow(non_camel_case_types)]
-#![allow(non_snake_case)]
-include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
-
-pub mod nonrelfile_utils;
-pub mod pg_constants;
-pub mod relfile_utils;
-pub mod xlog_utils;
-
-use bytes::{Buf, Bytes, BytesMut};
-
-// sizeof(ControlFileData)
-const SIZEOF_CONTROLDATA: usize = std::mem::size_of::<ControlFileData>();
-const SIZEOF_CHECKPOINT: usize = std::mem::size_of::<CheckPoint>();
-const OFFSETOF_CRC: usize = PG_CONTROLFILEDATA_OFFSETOF_CRC as usize;
-
-impl ControlFileData {
-    // Initialize an all-zeros ControlFileData struct
-    pub fn new() -> ControlFileData {
-        let controlfile: ControlFileData;
-
-        let b = [0u8; SIZEOF_CONTROLDATA];
-        controlfile =
-            unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
-
-        controlfile
-    }
-}
-
-pub fn decode_pg_control(mut buf: Bytes) -> Result<ControlFileData, anyhow::Error> {
-    let mut b: [u8; SIZEOF_CONTROLDATA] = [0u8; SIZEOF_CONTROLDATA];
-    buf.copy_to_slice(&mut b);
-
-    let controlfile: ControlFileData;
-
-    // TODO: verify CRC
-    let mut data_without_crc: [u8; OFFSETOF_CRC] = [0u8; OFFSETOF_CRC];
-    data_without_crc.copy_from_slice(&b[0..OFFSETOF_CRC]);
-    let expectedcrc = crc32c::crc32c(&data_without_crc);
-
-    controlfile = unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
-
-    if expectedcrc != controlfile.crc {
-        anyhow::bail!(
-            "invalid CRC in control file: expected {:08X}, was {:08X}",
-            expectedcrc,
-            controlfile.crc
-        );
-    }
-
-    Ok(controlfile)
-}
-
-pub fn encode_pg_control(controlfile: ControlFileData) -> Bytes {
-    let b: [u8; SIZEOF_CONTROLDATA];
-
-    b = unsafe { std::mem::transmute::<ControlFileData, [u8; SIZEOF_CONTROLDATA]>(controlfile) };
-
-    // Recompute the CRC
-    let mut data_without_crc: [u8; OFFSETOF_CRC] = [0u8; OFFSETOF_CRC];
-    data_without_crc.copy_from_slice(&b[0..OFFSETOF_CRC]);
-    let newcrc = crc32c::crc32c(&data_without_crc);
-
-    let mut buf = BytesMut::with_capacity(PG_CONTROL_FILE_SIZE as usize);
-
-    buf.extend_from_slice(&b[0..OFFSETOF_CRC]);
-    buf.extend_from_slice(&newcrc.to_ne_bytes());
-    // Fill the rest of the control file with zeros.
-    buf.resize(PG_CONTROL_FILE_SIZE as usize, 0);
-
-    buf.into()
-}
-
-pub fn encode_checkpoint(checkpoint: CheckPoint) -> Bytes {
-    let b: [u8; SIZEOF_CHECKPOINT];
-    b = unsafe { std::mem::transmute::<CheckPoint, [u8; SIZEOF_CHECKPOINT]>(checkpoint) };
-    return Bytes::copy_from_slice(&b[..]);
-}
-
-pub fn decode_checkpoint(mut buf: Bytes) -> Result<CheckPoint, anyhow::Error> {
-    let mut b = [0u8; SIZEOF_CHECKPOINT];
-    buf.copy_to_slice(&mut b);
-    let checkpoint: CheckPoint;
-    checkpoint = unsafe { std::mem::transmute::<[u8; SIZEOF_CHECKPOINT], CheckPoint>(b) };
-    Ok(checkpoint)
-}
-
-impl CheckPoint {
-    pub fn new(lsn: u64, timeline: u32) -> CheckPoint {
-        CheckPoint {
-            redo: lsn,
-            ThisTimeLineID: timeline,
-            PrevTimeLineID: timeline,
-            fullPageWrites: true, // TODO: get actual value of full_page_writes
-            nextXid: FullTransactionId {
-                value: pg_constants::FIRST_NORMAL_TRANSACTION_ID as u64,
-            }, // TODO: handle epoch?
-            nextOid: pg_constants::FIRST_BOOTSTRAP_OBJECT_ID,
-            nextMulti: 1,
-            nextMultiOffset: 0,
-            oldestXid: pg_constants::FIRST_NORMAL_TRANSACTION_ID,
-            oldestXidDB: 0,
-            oldestMulti: 1,
-            oldestMultiDB: 0,
-            time: 0,
-            oldestCommitTsXid: 0,
-            newestCommitTsXid: 0,
-            oldestActiveXid: pg_constants::INVALID_TRANSACTION_ID,
-        }
-    }
-}
--- a/postgres_ffi/src/nonrelfile_utils.rs
+++ b/postgres_ffi/src/nonrelfile_utils.rs
@@ -1,32 +0,0 @@
-//!
-//! Common utilities for dealing with PostgreSQL non-relation files.
-//!
-use crate::pg_constants;
-use bytes::BytesMut;
-use log::*;
-
-pub fn transaction_id_set_status(xid: u32, status: u8, page: &mut BytesMut) {
-    trace!(
-        "handle_apply_request for RM_XACT_ID-{} (1-commit, 2-abort, 3-sub_commit)",
-        status
-    );
-
-    let byteno: usize = ((xid as u32 % pg_constants::CLOG_XACTS_PER_PAGE as u32)
-        / pg_constants::CLOG_XACTS_PER_BYTE) as usize;
-
-    let bshift: u8 =
-        ((xid % pg_constants::CLOG_XACTS_PER_BYTE) * pg_constants::CLOG_BITS_PER_XACT as u32) as u8;
-
-    page[byteno] =
-        (page[byteno] & !(pg_constants::CLOG_XACT_BITMASK << bshift)) | (status << bshift);
-}
-
-pub fn transaction_id_get_status(xid: u32, page: &[u8]) -> u8 {
-    let byteno: usize = ((xid as u32 % pg_constants::CLOG_XACTS_PER_PAGE as u32)
-        / pg_constants::CLOG_XACTS_PER_BYTE) as usize;
-
-    let bshift: u8 =
-        ((xid % pg_constants::CLOG_XACTS_PER_BYTE) * pg_constants::CLOG_BITS_PER_XACT as u32) as u8;
-
-    return ((page[byteno] >> bshift) & pg_constants::CLOG_XACT_BITMASK) as u8;
-}
--- a/postgres_ffi/src/pg_constants.rs
+++ b/postgres_ffi/src/pg_constants.rs
@@ -1,178 +0,0 @@
-//!
-//! Misc constants, copied from PostgreSQL headers.
-//!
-
-//
-// From pg_tablespace_d.h
-//
-pub const DEFAULTTABLESPACE_OID: u32 = 1663;
-pub const GLOBALTABLESPACE_OID: u32 = 1664;
-
-//
-// Fork numbers, from relpath.h
-//
-pub const MAIN_FORKNUM: u8 = 0;
-pub const FSM_FORKNUM: u8 = 1;
-pub const VISIBILITYMAP_FORKNUM: u8 = 2;
-pub const INIT_FORKNUM: u8 = 3;
-// Special values for non-rel files' tags (Zenith-specific)
-//Special values for non-rel files' tags
-pub const PG_CONTROLFILE_FORKNUM: u8 = 42;
-pub const PG_FILENODEMAP_FORKNUM: u8 = 43;
-pub const PG_XACT_FORKNUM: u8 = 44;
-pub const PG_MXACT_OFFSETS_FORKNUM: u8 = 45;
-pub const PG_MXACT_MEMBERS_FORKNUM: u8 = 46;
-pub const PG_TWOPHASE_FORKNUM: u8 = 47;
-pub const PG_CHECKPOINT_FORKNUM: u8 = 48;
-
-// From storage_xlog.h
-pub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;
-
-// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
-// --with-segsize=SEGSIZE, but assume the defaults for now.
-pub const BLCKSZ: u16 = 8192;
-pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
-
-//
-// constants from clog.h
-//
-pub const CLOG_XACTS_PER_BYTE: u32 = 4;
-pub const CLOG_XACTS_PER_PAGE: u32 = BLCKSZ as u32 * CLOG_XACTS_PER_BYTE;
-pub const CLOG_BITS_PER_XACT: u8 = 2;
-pub const CLOG_XACT_BITMASK: u8 = (1 << CLOG_BITS_PER_XACT) - 1;
-
-//
-// Constants from visbilitymap.h
-//
-pub const SIZE_OF_PAGE_HEADER: u16 = 24;
-pub const BITS_PER_HEAPBLOCK: u16 = 2;
-pub const HEAPBLOCKS_PER_PAGE: u16 = (BLCKSZ - SIZE_OF_PAGE_HEADER) * 8 / BITS_PER_HEAPBLOCK;
-
-pub const TRANSACTION_STATUS_IN_PROGRESS: u8 = 0x00;
-pub const TRANSACTION_STATUS_COMMITTED: u8 = 0x01;
-pub const TRANSACTION_STATUS_ABORTED: u8 = 0x02;
-pub const TRANSACTION_STATUS_SUB_COMMITTED: u8 = 0x03;
-
-pub const CLOG_ZEROPAGE: u8 = 0x00;
-pub const CLOG_TRUNCATE: u8 = 0x10;
-
-// From xact.h
-pub const XLOG_XACT_COMMIT: u8 = 0x00;
-pub const XLOG_XACT_PREPARE: u8 = 0x10;
-pub const XLOG_XACT_ABORT: u8 = 0x20;
-pub const XLOG_XACT_COMMIT_PREPARED: u8 = 0x30;
-pub const XLOG_XACT_ABORT_PREPARED: u8 = 0x40;
-
-// From srlu.h
-pub const SLRU_PAGES_PER_SEGMENT: u32 = 32;
-
-/* mask for filtering opcodes out of xl_info */
-pub const XLOG_XACT_OPMASK: u8 = 0x70;
-/* does this record have a 'xinfo' field or not */
-pub const XLOG_XACT_HAS_INFO: u8 = 0x80;
-
-/*
- * The following flags, stored in xinfo, determine which information is
- * contained in commit/abort records.
- */
-pub const XACT_XINFO_HAS_DBINFO: u32 = 1u32 << 0;
-pub const XACT_XINFO_HAS_SUBXACTS: u32 = 1u32 << 1;
-pub const XACT_XINFO_HAS_RELFILENODES: u32 = 1u32 << 2;
-pub const XACT_XINFO_HAS_INVALS: u32 = 1u32 << 3;
-pub const XACT_XINFO_HAS_TWOPHASE: u32 = 1u32 << 4;
-// pub const XACT_XINFO_HAS_ORIGIN: u32 = 1u32 << 5;
-// pub const XACT_XINFO_HAS_AE_LOCKS: u32 = 1u32 << 6;
-// pub const XACT_XINFO_HAS_GID: u32 = 1u32 << 7;
-
-// From pg_control.h and rmgrlist.h
-pub const XLOG_NEXTOID: u8 = 0x30;
-pub const XLOG_SWITCH: u8 = 0x40;
-pub const XLOG_SMGR_TRUNCATE: u8 = 0x20;
-
-// From multixact.h
-pub const XLOG_MULTIXACT_ZERO_OFF_PAGE: u8 = 0x00;
-pub const XLOG_MULTIXACT_ZERO_MEM_PAGE: u8 = 0x10;
-pub const XLOG_MULTIXACT_CREATE_ID: u8 = 0x20;
-pub const XLOG_MULTIXACT_TRUNCATE_ID: u8 = 0x30;
-
-pub const MULTIXACT_OFFSETS_PER_PAGE: u16 = BLCKSZ / 4;
-pub const MXACT_MEMBER_BITS_PER_XACT: u16 = 8;
-pub const MXACT_MEMBER_FLAGS_PER_BYTE: u16 = 1;
-pub const MULTIXACT_FLAGBYTES_PER_GROUP: u16 = 4;
-pub const MULTIXACT_MEMBERS_PER_MEMBERGROUP: u16 =
-    MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE;
-/* size in bytes of a complete group */
-pub const MULTIXACT_MEMBERGROUP_SIZE: u16 =
-    4 * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP;
-pub const MULTIXACT_MEMBERGROUPS_PER_PAGE: u16 = BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE;
-pub const MULTIXACT_MEMBERS_PER_PAGE: u16 =
-    MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP;
-
-// From heapam_xlog.h
-pub const XLOG_HEAP_INSERT: u8 = 0x00;
-pub const XLOG_HEAP_DELETE: u8 = 0x10;
-pub const XLOG_HEAP_UPDATE: u8 = 0x20;
-pub const XLOG_HEAP_HOT_UPDATE: u8 = 0x40;
-pub const XLOG_HEAP2_VISIBLE: u8 = 0x40;
-pub const XLOG_HEAP2_MULTI_INSERT: u8 = 0x50;
-pub const XLH_INSERT_ALL_FROZEN_SET: u8 = (1 << 5) as u8;
-pub const XLH_INSERT_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
-pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
-pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
-pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
-
-pub const RM_XLOG_ID: u8 = 0;
-pub const RM_XACT_ID: u8 = 1;
-pub const RM_SMGR_ID: u8 = 2;
-pub const RM_CLOG_ID: u8 = 3;
-pub const RM_DBASE_ID: u8 = 4;
-pub const RM_TBLSPC_ID: u8 = 5;
-pub const RM_MULTIXACT_ID: u8 = 6;
-pub const RM_RELMAP_ID: u8 = 7;
-pub const RM_STANDBY_ID: u8 = 8;
-pub const RM_HEAP2_ID: u8 = 9;
-pub const RM_HEAP_ID: u8 = 10;
-
-// from xlogreader.h
-pub const XLR_INFO_MASK: u8 = 0x0F;
-pub const XLR_RMGR_INFO_MASK: u8 = 0xF0;
-
-// from dbcommands_xlog.h
-pub const XLOG_DBASE_CREATE: u8 = 0x00;
-pub const XLOG_DBASE_DROP: u8 = 0x10;
-
-pub const XLOG_TBLSPC_CREATE: u8 = 0x00;
-pub const XLOG_TBLSPC_DROP: u8 = 0x10;
-
-pub const SIZEOF_XLOGRECORD: u32 = 24;
-
-//
-// from xlogrecord.h
-//
-pub const XLR_MAX_BLOCK_ID: u8 = 32;
-
-pub const XLR_BLOCK_ID_DATA_SHORT: u8 = 255;
-pub const XLR_BLOCK_ID_DATA_LONG: u8 = 254;
-pub const XLR_BLOCK_ID_ORIGIN: u8 = 253;
-pub const XLR_BLOCK_ID_TOPLEVEL_XID: u8 = 252;
-
-pub const BKPBLOCK_FORK_MASK: u8 = 0x0F;
-pub const _BKPBLOCK_FLAG_MASK: u8 = 0xF0;
-pub const BKPBLOCK_HAS_IMAGE: u8 = 0x10; /* block data is an XLogRecordBlockImage */
-pub const BKPBLOCK_HAS_DATA: u8 = 0x20;
-pub const BKPBLOCK_WILL_INIT: u8 = 0x40; /* redo will re-init the page */
-pub const BKPBLOCK_SAME_REL: u8 = 0x80; /* RelFileNode omitted, same as previous */
-
-/* Information stored in bimg_info */
-pub const BKPIMAGE_HAS_HOLE: u8 = 0x01; /* page image has "hole" */
-pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
-pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
-
-/* From transam.h */
-pub const FIRST_NORMAL_TRANSACTION_ID: u32 = 3;
-pub const INVALID_TRANSACTION_ID: u32 = 0;
-pub const FIRST_BOOTSTRAP_OBJECT_ID: u32 = 12000;
-pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;
-
-/* FIXME: pageserver should request wal_seg_size from compute node */
-pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;
--- a/postgres_ffi/src/relfile_utils.rs
+++ b/postgres_ffi/src/relfile_utils.rs
@@ -1,151 +0,0 @@
-//!
-//! Common utilities for dealing with PostgreSQL relation files.
-//!
-use crate::pg_constants;
-use lazy_static::lazy_static;
-use regex::Regex;
-
-#[derive(Debug, Clone, thiserror::Error, PartialEq)]
-pub enum FilePathError {
-    #[error("invalid relation fork name")]
-    InvalidForkName,
-    #[error("invalid relation data file name")]
-    InvalidFileName,
-}
-
-impl From<core::num::ParseIntError> for FilePathError {
-    fn from(_e: core::num::ParseIntError) -> Self {
-        FilePathError::InvalidFileName
-    }
-}
-
-/// Convert Postgres relation file's fork suffix to fork number.
-pub fn forkname_to_number(forkname: Option<&str>) -> Result<u8, FilePathError> {
-    match forkname {
-        // "main" is not in filenames, it's implicit if the fork name is not present
-        None => Ok(pg_constants::MAIN_FORKNUM),
-        Some("fsm") => Ok(pg_constants::FSM_FORKNUM),
-        Some("vm") => Ok(pg_constants::VISIBILITYMAP_FORKNUM),
-        Some("init") => Ok(pg_constants::INIT_FORKNUM),
-        Some(_) => Err(FilePathError::InvalidForkName),
-    }
-}
-
-/// Convert Postgres fork number to the right suffix of the relation data file.
-pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
-    match forknum {
-        pg_constants::MAIN_FORKNUM => None,
-        pg_constants::FSM_FORKNUM => Some("fsm"),
-        pg_constants::VISIBILITYMAP_FORKNUM => Some("vm"),
-        pg_constants::INIT_FORKNUM => Some("init"),
-
-        // These should not appear in WAL records, but we use them internally,
-        // and need to be prepared to print them out in log messages and such
-        pg_constants::PG_CONTROLFILE_FORKNUM => Some("controlfile"),
-        pg_constants::PG_FILENODEMAP_FORKNUM => Some("filenodemap"),
-        pg_constants::PG_XACT_FORKNUM => Some("xact"),
-        pg_constants::PG_MXACT_OFFSETS_FORKNUM => Some("mxact_offsets"),
-        pg_constants::PG_MXACT_MEMBERS_FORKNUM => Some("mxact_members"),
-        pg_constants::PG_TWOPHASE_FORKNUM => Some("twophase"),
-
-        _ => Some("UNKNOWN FORKNUM"),
-    }
-}
-
-///
-/// Parse a filename of a relation file. Returns (relfilenode, forknum, segno) tuple.
-///
-/// Formats:
-/// <oid>
-/// <oid>_<fork name>
-/// <oid>.<segment number>
-/// <oid>_<fork name>.<segment number>
-///
-/// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
-///
-pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
-    lazy_static! {
-        static ref RELFILE_RE: Regex =
-            Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
-    }
-    let caps = RELFILE_RE
-        .captures(fname)
-        .ok_or(FilePathError::InvalidFileName)?;
-
-    let relnode_str = caps.name("relnode").unwrap().as_str();
-    let relnode = relnode_str.parse::<u32>()?;
-
-    let forkname = caps.name("forkname").map(|f| f.as_str());
-    let forknum = forkname_to_number(forkname)?;
-
-    let segno_match = caps.name("segno");
-    let segno = if segno_match.is_none() {
-        0
-    } else {
-        segno_match.unwrap().as_str().parse::<u32>()?
-    };
-
-    Ok((relnode, forknum, segno))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_parse_valid_relfilenames() {
-        assert_eq!(parse_relfilename("1234"), Ok((1234, 0, 0)));
-        assert_eq!(parse_relfilename("1234_fsm"), Ok((1234, 1, 0)));
-        assert_eq!(parse_relfilename("1234_vm"), Ok((1234, 2, 0)));
-        assert_eq!(parse_relfilename("1234_init"), Ok((1234, 3, 0)));
-
-        assert_eq!(parse_relfilename("1234.12"), Ok((1234, 0, 12)));
-        assert_eq!(parse_relfilename("1234_fsm.12"), Ok((1234, 1, 12)));
-        assert_eq!(parse_relfilename("1234_vm.12"), Ok((1234, 2, 12)));
-        assert_eq!(parse_relfilename("1234_init.12"), Ok((1234, 3, 12)));
-
-        // relfilenode is unsigned, so it can go up to 2^32-1
-        assert_eq!(parse_relfilename("3147483648"), Ok((3147483648, 0, 0)));
-    }
-
-    #[test]
-    fn test_parse_invalid_relfilenames() {
-        assert_eq!(
-            parse_relfilename("foo"),
-            Err(FilePathError::InvalidFileName)
-        );
-        assert_eq!(
-            parse_relfilename("1.2.3"),
-            Err(FilePathError::InvalidFileName)
-        );
-        assert_eq!(
-            parse_relfilename("1234_invalid"),
-            Err(FilePathError::InvalidForkName)
-        );
-        assert_eq!(
-            parse_relfilename("1234_"),
-            Err(FilePathError::InvalidFileName)
-        );
-
-        // too large for u32
-        assert_eq!(
-            parse_relfilename("12345678901"),
-            Err(FilePathError::InvalidFileName)
-        );
-        assert_eq!(
-            parse_relfilename("-1234"),
-            Err(FilePathError::InvalidFileName)
-        );
-    }
-
-    #[test]
-    fn test_parse_weird_relfilenames() {
-        // we accept 0 for the relfilenode, but PostgreSQL should never do that.
-        assert_eq!(parse_relfilename("0"), Ok((0, 0, 0)));
-
-        // PostgreSQL has a limit of 2^32-2 blocks in a table. With 8k block size and
-        // 1 GB segments, the max segment number is 32767. But we accept larger values
-        // currently.
-        assert_eq!(parse_relfilename("1.123456"), Ok((1, 0, 123456)));
-    }
-}
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -1,92 +0,0 @@
-## Zenith test runner
-
-This directory contains integration tests.
-
-Prerequisites:
- Python 3.6 or later
- Python packages: pytest, psycopg2
-    - pytest 6.0 is required.
-    - __NOTE: `apt install` on Debian/Ubuntu won't work.__
-      They ship a much older version of pytest (and sometimes rename it to
-      `pytest-3`.)
-    - Install using something like this:
-        - `pip3 install pytest psycopg2` (Debian or Ubuntu)
- Zenith and Postgres binaries
-    - See the root README.md for build directions
-    - Tests can be run from the git tree; or see the environment variables
-      below to run from other directories.
- The zenith git repo, including the postgres submodule
-  (for some tests, e.g. pg_regress)
-
-### Test Organization
-
-The tests are divided into a few batches, such that each batch takes roughly
-the same amount of time. The batches can be run in parallel, to minimize total
-runtime. Currently, there are only two batches:
-
- test_batch_pg_regress: Runs PostgreSQL regression tests
- test_others: All other tests
-
-### Running the tests
-
-Because pytest will search all subdirectories for tests, it's easiest to
-run the tests from within the `test_runner` directory.
-
-Test state (postgres data, pageserver state, and log files) will
-be stored under a directory `test_output`.
-
-You can run all the tests with:
-
-`pytest`
-
-If you want to run all the tests in a particular file:
-
-`pytest test_pgbench.py`
-
-If you want to run all tests that have the string "bench" in their names:
-
-`pytest -k bench`
-
-Useful environment variables:
-
-`ZENITH_BIN`: The directory where zenith binaries can be found.
-`POSTGRES_DISTRIB_DIR`: The directory where postgres distribution can be found.
-`TEST_OUTPUT`: Set the directory where test state and test output files
-should go.
-`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
-
-Let stdout and stderr go to the terminal instead of capturing them:
-`pytest -s ...`
-(Note many tests capture subprocess outputs separately, so this may not
-show much.)
-
-Exit after the first test failure:
-`pytest -x ...`
-(there are many more pytest options; run `pytest -h` to see them.)
-
-
-### Building new tests
-
-The tests make heavy use of pytest fixtures. You can read about how they work here: https://docs.pytest.org/en/stable/fixture.html
-
-Essentially, this means that each time you see a fixture named as an input parameter, the function with that name will be run and passed as a parameter to the function.
-
-So this code:
-```
-def test_something(zenith_cli, pg_bin):
-    pass
-```
-
-... will run the fixtures called `zenith_cli` and `pg_bin` and deliver those results to the test function.
-
-Fixtures can't be imported using the normal python syntax. Instead, use this:
-```
-pytest_plugins = ("fixtures.something")
-```
-That will make all the fixtures in the `fixtures/something.py` file available.
-
-Anything that's likely to be used in multiple tests should be built into a fixture.
-
-Note that fixtures can clean up after themselves if they use the `yield` syntax.
-Cleanup will happen even if the test fails (raises an unhandled exception).
-Python destructors, e.g. `__del__()` aren't recommended for cleanup.
--- a/test_runner/batch_others/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -1,67 +0,0 @@
-import pytest
-import getpass
-import psycopg2
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-#
-# Create a couple of branches off the main branch, at a historical point in time.
-#
-def test_branch_behind(zenith_cli, pageserver, postgres, pg_bin):
-    # Branch at the point where only 100 rows were inserted
-    zenith_cli.run(["branch", "test_branch_behind", "empty"]);
-
-    pgmain = postgres.create_start('test_branch_behind')
-    print("postgres is running on 'test_branch_behind' branch")
-
-    main_pg_conn = psycopg2.connect(pgmain.connstr());
-    main_pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    main_cur = main_pg_conn.cursor()
-
-    # Create table, and insert the first 100 rows
-    main_cur.execute('CREATE TABLE foo (t text)');
-    main_cur.execute("INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100) g");
-    main_cur.execute('SELECT pg_current_wal_insert_lsn()');
-    lsn_a = main_cur.fetchone()[0]
-    print('LSN after 100 rows: ' + lsn_a)
-
-    # Insert some more rows. (This generates enough WAL to fill a few segments.)
-    main_cur.execute("INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g");
-    main_cur.execute('SELECT pg_current_wal_insert_lsn()');
-    lsn_b = main_cur.fetchone()[0]
-    print('LSN after 100100 rows: ' + lsn_b)
-
-    # Branch at the point where only 100 rows were inserted
-    zenith_cli.run(["branch", "test_branch_behind_hundred", "test_branch_behind@"+lsn_a]);
-
-    # Insert many more rows. This generates enough WAL to fill a few segments.
-    main_cur.execute("INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g");
-    main_cur.execute('SELECT pg_current_wal_insert_lsn()');
-
-    main_cur.execute('SELECT pg_current_wal_insert_lsn()');
-    lsn_c = main_cur.fetchone()[0]
-    print('LSN after 200100 rows: ' + lsn_c)
-
-    # Branch at the point where only 200 rows were inserted
-    zenith_cli.run(["branch", "test_branch_behind_more", "test_branch_behind@"+lsn_b]);
-
-    pg_hundred = postgres.create_start("test_branch_behind_hundred")
-    pg_more = postgres.create_start("test_branch_behind_more")
-
-    # On the 'hundred' branch, we should see only 100 rows
-    hundred_pg_conn = psycopg2.connect(pg_hundred.connstr())
-    hundred_pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    hundred_cur = hundred_pg_conn.cursor()
-    hundred_cur.execute('SELECT count(*) FROM foo');
-    assert(hundred_cur.fetchone()[0] == 100);
-
-    # On the 'more' branch, we should see 100200 rows
-    more_pg_conn = psycopg2.connect(pg_more.connstr())
-    more_pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    more_cur = more_pg_conn.cursor()
-    more_cur.execute('SELECT count(*) FROM foo');
-    assert(more_cur.fetchone()[0] == 100100);
-
-    # All the rows are visible on the main branch
-    main_cur.execute('SELECT count(*) FROM foo');
-    assert(main_cur.fetchone()[0] == 200100);
--- a/test_runner/batch_others/test_config.py
+++ b/test_runner/batch_others/test_config.py
@@ -1,30 +0,0 @@
-import pytest
-import os
-import getpass
-import psycopg2
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-
-#
-# Test starting Postgres with custom options
-#
-def test_config(zenith_cli, pageserver, postgres, pg_bin):
-    # Create a branch for us
-    zenith_cli.run(["branch", "test_config", "empty"]);
-
-    # change config
-    pg = postgres.create_start('test_config', ['log_min_messages=debug1'])
-    print('postgres is running on test_config branch')
-
-    pg_conn = psycopg2.connect(pg.connstr())
-    pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    cur = pg_conn.cursor()
-
-    #check that config change was applied
-    cur.execute('SELECT name, setting from pg_settings WHERE source!=%s and source!=%s', ("default","override",))
-    for record in cur:
-        if record[0] == 'log_min_messages':
-            assert(record[1] == 'debug1')
-
-    pg_conn.close()
--- a/test_runner/batch_others/test_createdb.py
+++ b/test_runner/batch_others/test_createdb.py
@@ -1,37 +0,0 @@
-import pytest
-import getpass
-import psycopg2
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-#
-# Test CREATE DATABASE when there have been relmapper changes
-#
-def test_createdb(zenith_cli, pageserver, postgres, pg_bin):
-    zenith_cli.run(["branch", "test_createdb", "empty"]);
-
-    pg = postgres.create_start('test_createdb')
-    print("postgres is running on 'test_createdb' branch")
-
-    conn = psycopg2.connect(pg.connstr());
-    conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    cur = conn.cursor()
-
-    # Cause a 'relmapper' change in the original branch
-    cur.execute('VACUUM FULL pg_class');
-
-    cur.execute('CREATE DATABASE foodb');
-
-    cur.execute('SELECT pg_current_wal_insert_lsn()');
-    lsn = cur.fetchone()[0]
-
-    conn.close();
-
-    # Create a branch
-    zenith_cli.run(["branch", "test_createdb2", "test_createdb@"+lsn]);
-
-    pg2 = postgres.create_start('test_createdb2')
-
-    # Test that you can connect to the new database on both branches
-    conn = psycopg2.connect(pg.connstr('foodb'));
-    conn2 = psycopg2.connect(pg2.connstr('foodb'));
--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -1,54 +0,0 @@
-import pytest
-import psycopg2
-import getpass
-import json
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-def test_status(pageserver):
-    pg_conn = psycopg2.connect(pageserver.connstr())
-    pg_conn.autocommit = True
-    cur = pg_conn.cursor()
-    cur.execute('status;')
-    assert cur.fetchone() == ('hello world',)
-    pg_conn.close()
-
-def test_branch_list(pageserver, zenith_cli):
-
-    # Create a branch for us
-    zenith_cli.run(["branch", "test_branch_list_main", "empty"]);
-
-    page_server_conn = psycopg2.connect(pageserver.connstr())
-    page_server_conn.autocommit = True
-    page_server_cur = page_server_conn.cursor()
-
-    page_server_cur.execute('branch_list;')
-    branches = json.loads(page_server_cur.fetchone()[0])
-    # Filter out branches created by other tests
-    branches = [x for x in branches if x['name'].startswith('test_branch_list')]
-
-    assert len(branches) == 1
-    assert branches[0]['name'] == 'test_branch_list_main'
-    assert 'timeline_id' in branches[0]
-    assert 'latest_valid_lsn' in branches[0]
-    assert 'ancestor_id' in branches[0]
-    assert 'ancestor_lsn' in branches[0]
-
-    # Create another branch, and start Postgres on it
-    zenith_cli.run(['branch', 'test_branch_list_experimental', 'test_branch_list_main'])
-    zenith_cli.run(['pg', 'create', 'test_branch_list_experimental'])
-
-    page_server_cur.execute('branch_list;')
-    new_branches = json.loads(page_server_cur.fetchone()[0])
-    # Filter out branches created by other tests
-    new_branches = [x for x in new_branches if x['name'].startswith('test_branch_list')]
-    assert len(new_branches) == 2
-    new_branches.sort(key=lambda k: k['name'])
-
-    assert new_branches[0]['name'] == 'test_branch_list_experimental'
-    assert new_branches[0]['timeline_id'] != branches[0]['timeline_id']
-
-    # TODO: do the LSNs have to match here?
-    assert new_branches[1] == branches[0]
-
-    page_server_conn.close()
--- a/test_runner/batch_others/test_pgbench.py
+++ b/test_runner/batch_others/test_pgbench.py
@@ -1,17 +0,0 @@
-import pytest
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-
-def test_pgbench(pageserver, postgres, pg_bin, zenith_cli):
-
-    # Create a branch for us
-    zenith_cli.run(["branch", "test_pgbench", "empty"]);
-
-    pg = postgres.create_start('test_pgbench')
-    print("postgres is running on 'test_pgbench' branch")
-
-    connstr = pg.connstr();
-
-    pg_bin.run_capture(['pgbench', '-i', connstr])
-    pg_bin.run_capture(['pgbench'] + '-c 10 -T 5 -P 1 -M prepared'.split() + [connstr])
--- a/test_runner/batch_others/test_twophase.py
+++ b/test_runner/batch_others/test_twophase.py
@@ -1,58 +0,0 @@
-#
-# Test branching, when a transaction is in prepared state
-#
-import pytest
-import getpass
-import psycopg2
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-def test_twophase(zenith_cli, pageserver, postgres, pg_bin):
-    zenith_cli.run(["branch", "test_twophase", "empty"]);
-
-    pg = postgres.create_start('test_twophase', ['max_prepared_transactions=5'])
-    print("postgres is running on 'test_twophase' branch")
-
-    conn = psycopg2.connect(pg.connstr());
-    conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    cur = conn.cursor()
-
-    cur.execute('CREATE TABLE foo (t text)');
-
-    # Prepare a transaction that will insert a row
-    cur.execute('BEGIN');
-    cur.execute("INSERT INTO foo VALUES ('one')");
-    cur.execute("PREPARE TRANSACTION 'insert_one'");
-
-    # Prepare another transaction that will insert a row
-    cur.execute('BEGIN');
-    cur.execute("INSERT INTO foo VALUES ('two')");
-    cur.execute("PREPARE TRANSACTION 'insert_two'");
-
-    cur.execute('BEGIN');
-    cur.execute("INSERT INTO foo VALUES ('three')");
-    cur.execute("PREPARE TRANSACTION 'insert_three'");
-    cur.execute("COMMIT PREPARED 'insert_three'");
-
-    cur.execute('SELECT pg_current_wal_insert_lsn()');
-    lsn = cur.fetchone()[0]
-
-    # Create a branch with the transaction in prepared state
-    zenith_cli.run(["branch", "test_twophase_prepared", "test_twophase@"+lsn]);
-
-    pg2 = postgres.create_start('test_twophase_prepared', ['max_prepared_transactions=5'])
-    conn2 = psycopg2.connect(pg2.connstr());
-    conn2.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    cur2 = conn2.cursor()
-
-    # On the new branch, commit one of the prepared transactions, abort the other one.
-    cur2.execute("COMMIT PREPARED 'insert_one'");
-    cur2.execute("ROLLBACK PREPARED 'insert_two'");
-
-    cur2.execute('SELECT * FROM foo');
-    assert(cur2.fetchall() == [('one',),('three',)]);
-
-    # Neither insert is visible on the original branch, the transactions are still
-    # in prepared state there.
-    cur.execute('SELECT * FROM foo');
-    assert(cur.fetchall() == [('three',)]);
--- a/test_runner/batch_others/test_zenith_cli.py
+++ b/test_runner/batch_others/test_zenith_cli.py
@@ -1,49 +0,0 @@
-import pytest
-import psycopg2
-import json
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-def helper_compare_branch_list(page_server_cur, zenith_cli):
-    """
-    Compare branches list returned by CLI and directly via API.
-    Filters out branches created by other tests.
-    """
-
-    page_server_cur.execute('branch_list;')
-    branches_api = sorted(map(lambda b: b['name'], json.loads(page_server_cur.fetchone()[0])))
-    branches_api = [b for b in branches_api if b.startswith('test_cli_') or b in ('empty', 'main')]
-
-    res = zenith_cli.run(["branch"]);
-    assert(res.stderr == '')
-    branches_cli = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
-    branches_cli = [b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')]
-
-    assert(branches_api == branches_cli)
-
-def test_cli_branch_list(pageserver, zenith_cli):
-
-    page_server_conn = psycopg2.connect(pageserver.connstr())
-    page_server_conn.autocommit = True
-    page_server_cur = page_server_conn.cursor()
-
-    # Initial sanity check
-    helper_compare_branch_list(page_server_cur, zenith_cli)
-
-    # Create a branch for us
-    res = zenith_cli.run(["branch", "test_cli_branch_list_main", "main"]);
-    assert(res.stderr == '')
-    helper_compare_branch_list(page_server_cur, zenith_cli)
-
-    # Create a nested branch
-    res = zenith_cli.run(["branch", "test_cli_branch_list_nested", "test_cli_branch_list_main"]);
-    assert(res.stderr == '')
-    helper_compare_branch_list(page_server_cur, zenith_cli)
-
-    # Check that all new branches are visible via CLI
-    res = zenith_cli.run(["branch"]);
-    assert(res.stderr == '')
-    branches_cli = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
-
-    assert('test_cli_branch_list_main' in branches_cli)
-    assert('test_cli_branch_list_nested' in branches_cli)
--- a/test_runner/batch_pg_regress/test_pg_regress.py
+++ b/test_runner/batch_pg_regress/test_pg_regress.py
@@ -1,61 +0,0 @@
-import pytest
-from fixtures.utils import mkdir_if_needed
-import getpass
-import os
-import psycopg2
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-# FIXME: put host + port in a fixture
-HOST = 'localhost'
-PORT = 55432
-
-
-def test_pg_regress(pageserver, postgres, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir, base_dir, capsys):
-
-    # Create a branch for us
-    zenith_cli.run(["branch", "test_pg_regress", "empty"]);
-
-    # Connect to postgres and create a database called "regression".
-    pg = postgres.create_start('test_pg_regress')
-    pg_conn = psycopg2.connect(pg.connstr())
-    pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    cur = pg_conn.cursor()
-    cur.execute('CREATE DATABASE regression')
-    pg_conn.close()
-
-    # Create some local directories for pg_regress to run in.
-    runpath = os.path.join(test_output_dir, 'regress')
-    mkdir_if_needed(runpath)
-    mkdir_if_needed(os.path.join(runpath, 'testtablespace'))
-
-    # Compute all the file locations that pg_regress will need.
-    build_path = os.path.join(
-        pg_distrib_dir, 'build/src/test/regress')
-    src_path = os.path.join(
-        base_dir, 'vendor/postgres/src/test/regress')
-    bindir = os.path.join(pg_distrib_dir, 'bin')
-    schedule = os.path.join(src_path, 'parallel_schedule')
-    pg_regress = os.path.join(build_path, 'pg_regress')
-
-    pg_regress_command = [
-        pg_regress,
-        '--bindir=""',
-        '--use-existing',
-        '--bindir={}'.format(bindir),
-        '--dlpath={}'.format(build_path),
-        '--schedule={}'.format(schedule),
-        '--inputdir={}'.format(src_path),
-    ]
-
-    env = {
-        'PGPORT': str(pg.port),
-        'PGUSER': pg.username,
-        'PGHOST': pg.host,
-    }
-
-    # Run the command.
-    # We don't capture the output. It's not too chatty, and it always
-    # logs the exact same data to `regression.out` anyway.
-    with capsys.disabled():
-        pg_bin.run(pg_regress_command, env=env, cwd=runpath)
--- a/test_runner/batch_pg_regress/test_zehith_regress.py
+++ b/test_runner/batch_pg_regress/test_zehith_regress.py
@@ -1,62 +0,0 @@
-import pytest
-from fixtures.utils import mkdir_if_needed
-import getpass
-import os
-import psycopg2
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-# FIXME: put host + port in a fixture
-HOST = 'localhost'
-PORT = 55432
-
-
-def test_zenith_regress(pageserver, postgres, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir, base_dir, capsys):
-
-    # Create a branch for us
-    zenith_cli.run(["branch", "test_zenith_regress", "empty"]);
-
-    # Connect to postgres and create a database called "regression".
-    pg = postgres.create_start('test_zenith_regress')
-    pg_conn = psycopg2.connect(pg.connstr())
-    pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-    cur = pg_conn.cursor()
-    cur.execute('CREATE DATABASE regression')
-    pg_conn.close()
-
-    # Create some local directories for pg_regress to run in.
-    runpath = os.path.join(test_output_dir, 'regress')
-    mkdir_if_needed(runpath)
-    mkdir_if_needed(os.path.join(runpath, 'testtablespace'))
-
-    # Compute all the file locations that pg_regress will need.
-    # This test runs zenith specific tests
-    build_path = os.path.join(
-        pg_distrib_dir, 'build/src/test/regress')
-    src_path = os.path.join(
-        base_dir, 'test_runner/zenith_regress')
-    bindir = os.path.join(pg_distrib_dir, 'bin')
-    schedule = os.path.join(src_path, 'parallel_schedule')
-    pg_regress = os.path.join(build_path, 'pg_regress')
-
-    pg_regress_command = [
-        pg_regress,
-        '--use-existing',
-        '--bindir={}'.format(bindir),
-        '--dlpath={}'.format(build_path),
-        '--schedule={}'.format(schedule),
-        '--inputdir={}'.format(src_path),
-    ]
-
-    print(pg_regress_command)
-    env = {
-        'PGPORT': str(pg.port),
-        'PGUSER': pg.username,
-        'PGHOST': pg.host,
-    }
-
-    # Run the command.
-    # We don't capture the output. It's not too chatty, and it always
-    # logs the exact same data to `regression.out` anyway.
-    with capsys.disabled():
-        pg_bin.run(pg_regress_command, env=env, cwd=runpath)
--- a/test_runner/conftest.py
+++ b/test_runner/conftest.py
@@ -1 +0,0 @@
-pytest_plugins = ("fixtures.zenith_fixtures")
--- a/test_runner/fixtures/init.py
+++ b/test_runner/fixtures/init.py
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -1,53 +0,0 @@
-
-import os
-import subprocess
-
-def get_self_dir():
-    """ Get the path to the directory where this script lives. """
-    return os.path.dirname(os.path.abspath(__file__))
-
-
-def mkdir_if_needed(path):
-    """ Create a directory if it doesn't already exist
-
-    Note this won't try to create intermediate directories.
-    """
-    if os.path.exists(path):
-        assert os.path.isdir(path)
-        return
-    os.mkdir(path)
-
-
-def subprocess_capture(capture_dir, cmd, **kwargs):
-    """ Run a process and capture its output
-
-    Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
-    where "cmd" is the name of the program and NNN is an incrementing
-    counter.
-
-    If those files already exist, we will overwrite them.
-    """
-    assert type(cmd) is list
-    base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
-    basepath = os.path.join(capture_dir, base)
-    stdout_filename = basepath + '.stdout'
-    stderr_filename = basepath + '.stderr'
-
-    with open(stdout_filename, 'w') as stdout_f:
-        with open(stderr_filename, 'w') as stderr_f:
-            print('(capturing output to "{}.stdout")'.format(base))
-            subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
-
-
-_global_counter = 0
-
-
-def global_counter():
-    """ A really dumb global counter.
-
-    This is useful for giving output files a unique number, so if we run the
-    same command multiple times we can keep their output separate.
-    """
-    global _global_counter
-    _global_counter += 1
-    return _global_counter
--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -1,357 +0,0 @@
-import getpass
-import os
-import psycopg2
-import pytest
-import shutil
-import subprocess
-import sys
-from .utils import (get_self_dir, mkdir_if_needed,
-                    subprocess_capture, global_counter)
-
-"""
-This file contains pytest fixtures. A fixture is a test resource that can be
-summoned by placing its name in the test's arguments.
-
-A fixture is created with the decorator @zenfixture, which is a wrapper around
-the standard pytest.fixture with some extra behavior.
-
-There are several environment variables that can control the running of tests:
-ZENITH_BIN, POSTGRES_DISTRIB_DIR, etc. See README.md for more information.
-
-To use fixtures in a test file, add this line of code:
-
-    pytest_plugins = ("fixtures.zenith_fixtures")
-
-Don't import functions from this file, or pytest will emit warnings. Instead
-put directly-importable functions into utils.py or another separate file.
-"""
-
-DEFAULT_OUTPUT_DIR = 'test_output'
-DEFAULT_POSTGRES_DIR = 'tmp_install'
-
-
-def determine_scope(fixture_name, config):
-    return 'session'
-
-
-def zenfixture(func):
-    """ This is a python decorator for fixtures with a flexible scope.
-
-    By default every test function will set up and tear down a new
-    database. In pytest, this is called fixtures "function" scope.
-
-    If the environment variable TEST_SHARED_FIXTURES is set, then all
-    tests will share the same database. State, logs, etc. will be
-    stored in a directory called "shared".
-
-    """
-    if os.environ.get('TEST_SHARED_FIXTURES') is None:
-        scope = 'function'
-    else:
-        scope = 'session'
-    return pytest.fixture(func, scope=scope)
-
-
-@pytest.fixture(autouse=True, scope='session')
-def safety_check():
-    """ Ensure that no unwanted daemons are running before we start testing. """
-    # does not use -c as it is not supported on macOS
-    cmd = ['pgrep', 'pageserver|postgres|wal_acceptor']
-    result = subprocess.run(cmd, stdout=subprocess.DEVNULL)
-    if result.returncode == 0:
-        # returncode of 0 means it found something.
-        # This is bad; we don't want any of those processes polluting the
-        # result of the test.
-        raise Exception('found interfering processes running')
-
-
-class ZenithCli:
-    """ An object representing the CLI binary named "zenith".
-
-    We also store an environment that will tell the CLI to operate
-    on a particular ZENITH_REPO_DIR.
-    """
-
-    def __init__(self, binpath, repo_dir, pg_distrib_dir):
-        assert os.path.isdir(binpath)
-        self.binpath = binpath
-        self.bin_zenith = os.path.join(binpath, 'zenith')
-        self.env = os.environ.copy()
-        self.env['ZENITH_REPO_DIR'] = repo_dir
-        self.env['POSTGRES_DISTRIB_DIR'] = pg_distrib_dir
-
-    def run(self, arguments):
-        """ Run "zenith" with the specified arguments.
-
-        arguments must be in list form, e.g. ['pg', 'create']
-
-        Return both stdout and stderr, which can be accessed as
-
-        result = zenith_cli.run(...)
-        assert(result.stderr == "")
-        print(result.stdout)
-
-        """
-        assert type(arguments) == list
-        args = [self.bin_zenith] + arguments
-        print('Running command "{}"'.format(' '.join(args)))
-        return subprocess.run(args, env=self.env, check=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-
-@zenfixture
-def zenith_cli(zenith_binpath, repo_dir, pg_distrib_dir):
-    return ZenithCli(zenith_binpath, repo_dir, pg_distrib_dir)
-
-
-class ZenithPageserver:
-    """ An object representing a running pageserver. """
-
-    def __init__(self, zenith_cli):
-        self.zenith_cli = zenith_cli
-        self.running = False
-
-    # Initialize the repository, i.e. run "zenith init"
-    def init(self):
-        self.zenith_cli.run(['init'])
-
-    # Start the page server
-    def start(self):
-        self.zenith_cli.run(['start'])
-        self.running = True
-
-    # Stop the page server
-    def stop(self):
-        self.zenith_cli.run(['stop'])
-        self.running = True
-
-    # The page server speaks the Postgres FE/BE protocol, so you can connect
-    # to it with any Postgres client, and run special commands. This function
-    # returns a libpq connection string for connecting to it.
-    def connstr(self):
-        username = getpass.getuser()
-        conn_str = 'host={} port={} dbname=postgres user={}'.format(
-            'localhost', 64000, username)
-        return conn_str
-
-# The 'pageserver' fixture provides a Page Server that's up and running.
-#
-# If TEST_SHARED_FIXTURES is set, the Page Server instance is shared by all
-# the tests. To avoid clashing with other tests, don't use the 'main' branch in
-# the tests directly. Instead, create a branch off the 'empty' branch and use
-# that.
-#
-# By convention, the test branches are named after the tests. For example,
-# test called 'test_foo' would create and use branches with the 'test_foo' prefix.
-@zenfixture
-def pageserver(zenith_cli):
-    ps = ZenithPageserver(zenith_cli)
-    ps.init()
-    ps.start()
-    # For convenience in tests, create a branch from the freshly-initialized cluster.
-    zenith_cli.run(["branch", "empty", "main"]);
-    yield ps
-    # After the yield comes any cleanup code we need.
-    print('Starting pageserver cleanup')
-    ps.stop()
-
-class Postgres:
-    """ An object representing a running postgres daemon. """
-
-    def __init__(self, zenith_cli, repo_dir, instance_num):
-        self.zenith_cli = zenith_cli
-        self.instance_num = instance_num
-        self.running = False
-        self.username = getpass.getuser()
-        self.host = 'localhost'
-        self.port = 55431 + instance_num
-        self.repo_dir = repo_dir
-        self.branch = None
-        # path to conf is <repo_dir>/pgdatadirs/<branch_name>/postgresql.conf
-
-    def create_start(self, branch, config_lines=None):
-        """ create the pg data directory, and start the server """
-        self.zenith_cli.run(['pg', 'create', branch])
-        self.branch = branch
-        if config_lines is None:
-            config_lines = []
-        self.config(config_lines)
-        self.zenith_cli.run(['pg', 'start', branch])
-        self.running = True
-        return
-
-    #lines should be an array of valid postgresql.conf rows
-    def config(self, lines):
-        filename = 'pgdatadirs/{}/postgresql.conf'.format(self.branch)
-        config_name = os.path.join(self.repo_dir, filename)
-        with open(config_name, 'a') as conf:
-            for line in lines:
-                conf.write(line)
-                conf.write('\n')
-
-    def stop(self):
-        if self.running:
-            self.zenith_cli.run(['pg', 'stop', self.branch])
-
-    # Return a libpq connection string to connect to the Postgres instance
-    def connstr(self, dbname='postgres'):
-        conn_str = 'host={} port={} dbname={} user={}'.format(
-            self.host, self.port, dbname, self.username)
-        return conn_str
-
-class PostgresFactory:
-    """ An object representing multiple running postgres daemons. """
-    def __init__(self, zenith_cli, repo_dir):
-        self.zenith_cli = zenith_cli
-        self.host = 'localhost'
-        self.repo_dir = repo_dir
-        self.num_instances = 0
-        self.instances = []
-
-    def create_start(self, branch="main", config_lines=None):
-        pg = Postgres(self.zenith_cli, self.repo_dir, self.num_instances + 1)
-        self.num_instances += 1
-        self.instances.append(pg)
-        pg.create_start(branch, config_lines)
-        return pg
-
-    def stop_all(self):
-        for pg in self.instances:
-            pg.stop()
-
-@zenfixture
-def postgres(zenith_cli, repo_dir):
-    pgfactory = PostgresFactory(zenith_cli, repo_dir)
-    yield pgfactory
-    # After the yield comes any cleanup code we need.
-    print('Starting postgres cleanup')
-    pgfactory.stop_all()
-
-
-class PgBin:
-    """ A helper class for executing postgres binaries """
-
-    def __init__(self, log_dir, pg_distrib_dir):
-        self.log_dir = log_dir
-        self.pg_install_path = pg_distrib_dir
-        self.pg_bin_path = os.path.join(self.pg_install_path, 'bin')
-        self.env = os.environ.copy()
-        self.env['LD_LIBRARY_PATH'] = os.path.join(self.pg_install_path, 'lib')
-
-    def _fixpath(self, command):
-        if not '/' in command[0]:
-            command[0] = os.path.join(self.pg_bin_path, command[0])
-
-    def _build_env(self, env_add):
-        if env_add is None:
-            return self.env
-        env = self.env.copy()
-        env.update(env_add)
-        return env
-
-    def run(self, command, env=None, cwd=None):
-        """ Run one of the postgres binaries.
-
-        The command should be in list form, e.g. ['pgbench', '-p', '55432']
-
-        All the necessary environment variables will be set.
-
-        If the first argument (the command name) doesn't include a path (no '/'
-        characters present), then it will be edited to include the correct path.
-
-        If you want stdout/stderr captured to files, use `run_capture` instead.
-
-        """
-        self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
-        env = self._build_env(env)
-        subprocess.run(command, env=env, cwd=cwd, check=True)
-
-    def run_capture(self, command, env=None, cwd=None):
-        """ Run one of the postgres binaries, with stderr and stdout redirected to a file.
-
-        This is just like `run`, but for chatty programs.
-        """
-        self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
-        env = self._build_env(env)
-        subprocess_capture(self.log_dir, command, env=env, cwd=cwd, check=True)
-
-
-@zenfixture
-def pg_bin(test_output_dir, pg_distrib_dir):
-    return PgBin(test_output_dir, pg_distrib_dir)
-
-
-@zenfixture
-def base_dir():
-    """ find the base directory (currently this is the git root) """
-    base_dir = os.path.normpath(os.path.join(get_self_dir(), '../..'))
-    print('base_dir is', base_dir)
-    return base_dir
-
-
-@zenfixture
-def top_output_dir(base_dir):
-    """ Compute the top-level directory for all tests. """
-    env_test_output = os.environ.get('TEST_OUTPUT')
-    if env_test_output is not None:
-        output_dir = env_test_output
-    else:
-        output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR)
-    mkdir_if_needed(output_dir)
-    return output_dir
-
-
-@zenfixture
-def test_output_dir(request, top_output_dir):
-    """ Compute the working directory for an individual test. """
-    if os.environ.get('TEST_SHARED_FIXTURES') is None:
-        # one directory per test
-        test_name = request.node.name
-    else:
-        # We're running shared fixtures. Share a single directory.
-        test_name = 'shared'
-
-    test_output_dir = os.path.join(top_output_dir, test_name)
-    print('test_output_dir is', test_output_dir)
-    shutil.rmtree(test_output_dir, ignore_errors=True)
-    mkdir_if_needed(test_output_dir)
-    return test_output_dir
-
-
-@zenfixture
-def repo_dir(request, test_output_dir):
-    """ Compute the test repo_dir
-
-    "repo_dir" is the place where all of the pageserver files will go.
-    It doesn't have anything to do with the git repo.
-    """
-    repo_dir = os.path.join(test_output_dir, 'repo')
-    return repo_dir
-
-
-@zenfixture
-def zenith_binpath(base_dir):
-    """ find the zenith binaries """
-    env_zenith_bin = os.environ.get('ZENITH_BIN')
-    if env_zenith_bin:
-        zenith_dir = env_zenith_bin
-    else:
-        zenith_dir = os.path.join(base_dir, 'target/debug')
-    if not os.path.exists(os.path.join(zenith_dir, 'pageserver')):
-        raise Exception('zenith binaries not found at "{}"'.format(zenith_dir))
-    return zenith_dir
-
-
-@zenfixture
-def pg_distrib_dir(base_dir):
-    """ find the postgress install """
-    env_postgres_bin = os.environ.get('POSTGRES_DISTRIB_DIR')
-    if env_postgres_bin:
-        pg_dir = env_postgres_bin
-    else:
-        pg_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR))
-    print('postgres dir is', pg_dir)
-    if not os.path.exists(os.path.join(pg_dir, 'bin/postgres')):
-        raise Exception('postgres not found at "{}"'.format(pg_dir))
-    return pg_dir
--- a/test_runner/pytest.ini
+++ b/test_runner/pytest.ini
@@ -1,2 +0,0 @@
-[pytest]
-minversion = 6.0
--- a/test_runner/test_broken.py
+++ b/test_runner/test_broken.py
@@ -1,33 +0,0 @@
-import pytest
-import os
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-"""
-
-Use this test to see what happens when tests fail.
-
-We should be able to clean up after ourselves, including stopping any
-postgres or pageserver processes.
-
-Set the environment variable RUN_BROKEN to see this test run (and fail,
-and hopefully not leave any server processes behind).
-
-"""
-
-
-run_broken = pytest.mark.skipif(
-    os.environ.get('RUN_BROKEN') == None,
-    reason="only used for testing the fixtures"
-)
-
-@run_broken
-def test_broken(zenith_cli, pageserver, postgres, pg_bin):
-    # Create a branch for us
-    zenith_cli.run(["branch", "test_broken", "empty"]);
-
-    pg = postgres.create_start("test_broken")
-    print('postgres is running')
-
-    print('THIS NEXT COMMAND WILL FAIL:')
-    pg_bin.run('pgbench -i_am_a_broken_test'.split())
--- a/test_runner/zenith_regress/.gitignore
+++ b/test_runner/zenith_regress/.gitignore
@@ -1,11 +0,0 @@
-# Local binaries
-/pg_regress
-
-# Generated subdirectories
-/tmp_check/
-/results/
-/log/
-
-# Note: regression.* are only left behind on a failure; that's why they're not ignored
-#/regression.diffs
-#/regression.out
--- a/test_runner/zenith_regress/README.md
+++ b/test_runner/zenith_regress/README.md
@@ -1,11 +0,0 @@
-To add a new SQL test
-
- add sql script to run to zenith_regress/sql/testname.sql
- add expected output to zenith/regress/expected/testname.out
- add testname to both parallel_schedule and serial_schedule files*
-
-That's it.
-For more complex tests see PostgreSQL regression tests. These works basically the same.
-
-*it was changed recently in PostgreSQL upstream - no more separate serial_schedule.
-Someday we'll catch up with these changes.
--- a/test_runner/zenith_regress/expected/.gitignore
+++ b/test_runner/zenith_regress/expected/.gitignore
@@ -1,9 +0,0 @@
-/constraints.out
-/copy.out
-/create_function_1.out
-/create_function_2.out
-/largeobject.out
-/largeobject_1.out
-/misc.out
-/security_label.out
-/tablespace.out
--- a/test_runner/zenith_regress/expected/zenith-cid.out
+++ b/test_runner/zenith_regress/expected/zenith-cid.out
@@ -1,34 +0,0 @@
-BEGIN;
-SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-CREATE TABLE cursor (a int);
-INSERT INTO cursor VALUES (1);
-DECLARE c1 NO SCROLL CURSOR FOR SELECT * FROM cursor FOR UPDATE;
-UPDATE cursor SET a = 2;
-FETCH ALL FROM c1;
- a 
---
-(0 rows)
-
-COMMIT;
-DROP TABLE cursor;
-create table to_be_evicted(x bigint);
-begin;
-insert into to_be_evicted values (1);
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-insert into to_be_evicted select x*10 from to_be_evicted;
-select sum(x) from to_be_evicted;
-     sum     
-------------
- 25937424601
-(1 row)
-
-end;
-drop table to_be_evicted;
--- a/test_runner/zenith_regress/expected/zenith-clog.out
+++ b/test_runner/zenith_regress/expected/zenith-clog.out
@@ -1,15 +0,0 @@
-create or replace procedure do_commits() as $$
-declare
-    xid xid8;
-	i integer;
-begin
-    for i in 1..1000000 loop
-	    xid = txid_current();
-		commit;
-		if (pg_xact_status(xid) <> 'committed') then
-		   raise exception 'CLOG corruption';
-		end if;
-	end loop;
-end;
-$$ language plpgsql;
-call do_commits();
--- a/test_runner/zenith_regress/expected/zenith-rel-truncate.out
+++ b/test_runner/zenith_regress/expected/zenith-rel-truncate.out
@@ -1,19 +0,0 @@
--
-- Test that when a relation is truncated by VACUUM, the next smgrnblocks()
-- query to get the relation's size returns the new size.
-- (This isn't related to the TRUNCATE command, which works differently,
-- by creating a new relation file)
--
-CREATE TABLE truncatetest (i int);
-INSERT INTO truncatetest SELECT g FROM generate_series(1, 10000) g;
-- Remove all the rows, and run VACUUM to remove the dead tuples and
-- truncate the physical relation to 0 blocks.
-DELETE FROM truncatetest;
-VACUUM truncatetest;
-- Check that a SeqScan sees correct relation size (which is now 0)
-SELECT * FROM truncatetest;
- i 
---
-(0 rows)
-
-DROP TABLE truncatetest;
--- a/test_runner/zenith_regress/expected/zenith-vacuum-full.out
+++ b/test_runner/zenith_regress/expected/zenith-vacuum-full.out
@@ -1,304 +0,0 @@
-create table foo(a int primary key, b int, c int);
-insert into foo values (generate_series(1,10000), generate_series(1,10000), generate_series(1,10000));
-create index concurrently on foo(b);
-create index concurrently on foo(c);
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-vacuum full foo;
-\d foo
-                Table "public.foo"
- Column |  Type   | Collation | Nullable | Default 
--------+---------+-----------+----------+---------
- a      | integer |           | not null | 
- b      | integer |           |          | 
- c      | integer |           |          | 
-Indexes:
-    "foo_pkey" PRIMARY KEY, btree (a)
-    "foo_b_idx" btree (b)
-    "foo_c_idx" btree (c)
-
-drop table foo;
--- a/test_runner/zenith_regress/parallel_schedule
+++ b/test_runner/zenith_regress/parallel_schedule
@@ -1,11 +0,0 @@
-# ----------
-# src/test/regress/parallel_schedule
-#
-# By convention, we put no more than twenty tests in any one parallel group;
-# this limits the number of connections needed to run the tests.
-# ----------
-
-test: zenith-cid
-test: zenith-rel-truncate
-test: zenith-clog
-test: zenith-vacuum-full
--- a/test_runner/zenith_regress/serial_schedule
+++ b/test_runner/zenith_regress/serial_schedule
@@ -1,6 +0,0 @@
-# src/test/regress/serial_schedule
-# This should probably be in an order similar to parallel_schedule.
-test: zenith-cid
-test: zenith-rel-truncate
-test: zenith-clog
-test: zenith-vacuum-full
--- a/test_runner/zenith_regress/sql/.gitignore
+++ b/test_runner/zenith_regress/sql/.gitignore
@@ -1,8 +0,0 @@
-/constraints.sql
-/copy.sql
-/create_function_1.sql
-/create_function_2.sql
-/largeobject.sql
-/misc.sql
-/security_label.sql
-/tablespace.sql
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`pytest_plugins = ("fixtures.zenith_fixtures")`