Implement mock console

[WIP] [proxy] Migrate to async
2026-02-03 10:40:37 +00:00 · 2022-02-09 14:30:01 -05:00 · 2022-02-08 05:43:32 +03:00
126 changed files with 2895 additions and 4700 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -54,8 +54,7 @@ jobs:
            if [ ! -e tmp_install/bin/postgres ]; then
              # "depth 1" saves some time by not cloning the whole repo
              git submodule update --init --depth 1
-              # bail out on any warnings
-              COPT='-Werror' mold -run make postgres -j$(nproc)
+              mold -run make postgres -j$(nproc)
            fi

      - save_cache:
@@ -298,7 +297,6 @@ jobs:
            - PLATFORM: zenith-local-ci
          command: |
            PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
-            rm -rf $PERF_REPORT_DIR

            TEST_SELECTION="test_runner/<< parameters.test_selection >>"
            EXTRA_PARAMS="<< parameters.extra_params >>"
@@ -343,6 +341,7 @@ jobs:

            if << parameters.save_perf_report >>; then
              if [[ $CIRCLE_BRANCH == "main" ]]; then
+                # TODO: reuse scripts/git-upload
                export REPORT_FROM="$PERF_REPORT_DIR"
                export REPORT_TO=local
                scripts/generate_and_push_perf_report.sh
@@ -598,7 +597,6 @@ workflows:
            - build-postgres-<< matrix.build_type >>
      - run-pytest:
          name: pg_regress-tests-<< matrix.build_type >>
-          context: PERF_TEST_RESULT_CONNSTR
          matrix:
            parameters:
              build_type: ["debug", "release"]
@@ -616,7 +614,6 @@ workflows:
            - build-zenith-<< matrix.build_type >>
      - run-pytest:
          name: benchmarks
-          context: PERF_TEST_RESULT_CONNSTR
          build_type: release
          test_selection: performance
          run_in_parallel: false
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -3,7 +3,7 @@ name: benchmarking
 on:
  # uncomment to run on push for debugging your PR
  # push:
-  #   branches: [ your branch ]
+  #   branches: [ mybranch ]
  schedule:
    # * is a special character in YAML so you have to quote this string
    #          ┌───────────── minute (0 - 59)
@@ -41,7 +41,7 @@ jobs:
      run: |
        python3 -m pip install --upgrade poetry wheel
        # since pip/poetry caches are reused there shouldn't be any troubles with install every time
-        ./scripts/pysync
+        poetry install

    - name: Show versions
      run: |
@@ -89,15 +89,11 @@ jobs:
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
      run: |
-        # just to be sure that no data was cached on self hosted runner
-        # since it might generate duplicates when calling ingest_perf_test_result.py
-        rm -rf perf-report-staging
        mkdir -p perf-report-staging
        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging

    - name: Submit result
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
      run: |
        REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -1,14 +1,17 @@
 [package]
 name = "compute_tools"
 version = "0.1.0"
+authors = ["Alexey Kondratov <kondratov.aleksey@gmail.com>"]
 edition = "2021"

+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
 [dependencies]
 libc = "0.2"
 anyhow = "1.0"
 chrono = "0.4"
-clap = "3.0"
-env_logger = "0.9"
+clap = "2.33"
+env_logger = "0.8"
 hyper = { version = "0.14", features = ["full"] }
 log = { version = "0.4", features = ["std", "serde"] }
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
--- a/compute_tools/src/bin/zenith_ctl.rs
+++ b/compute_tools/src/bin/zenith_ctl.rs
@@ -34,7 +34,6 @@ use std::sync::{Arc, RwLock};

 use anyhow::{Context, Result};
 use chrono::Utc;
-use clap::Arg;
 use log::info;
 use postgres::{Client, NoTls};

@@ -163,34 +162,34 @@ fn main() -> Result<()> {
    let matches = clap::App::new("zenith_ctl")
        .version(version.unwrap_or("unknown"))
        .arg(
-            Arg::new("connstr")
-                .short('C')
+            clap::Arg::with_name("connstr")
+                .short("C")
                .long("connstr")
                .value_name("DATABASE_URL")
                .required(true),
        )
        .arg(
-            Arg::new("pgdata")
-                .short('D')
+            clap::Arg::with_name("pgdata")
+                .short("D")
                .long("pgdata")
                .value_name("DATADIR")
                .required(true),
        )
        .arg(
-            Arg::new("pgbin")
-                .short('b')
+            clap::Arg::with_name("pgbin")
+                .short("b")
                .long("pgbin")
                .value_name("POSTGRES_PATH"),
        )
        .arg(
-            Arg::new("spec")
-                .short('s')
+            clap::Arg::with_name("spec")
+                .short("s")
                .long("spec")
                .value_name("SPEC_JSON"),
        )
        .arg(
-            Arg::new("spec-path")
-                .short('S')
+            clap::Arg::with_name("spec-path")
+                .short("S")
                .long("spec-path")
                .value_name("SPEC_PATH"),
        )
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -1,8 +1,11 @@
 [package]
 name = "control_plane"
 version = "0.1.0"
+authors = ["Stas Kelvich <stas@zenith.tech>"]
 edition = "2021"

+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
 [dependencies]
 tar = "0.4.33"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
--- a/control_plane/safekeepers.conf
+++ b/control_plane/safekeepers.conf
@@ -1,20 +1,20 @@
 # Page server and three safekeepers.
 [pageserver]
-listen_pg_addr = '127.0.0.1:64000'
-listen_http_addr = '127.0.0.1:9898'
+listen_pg_addr = 'localhost:64000'
+listen_http_addr = 'localhost:9898'
 auth_type = 'Trust'

 [[safekeepers]]
-id = 1
+name = 'sk1'
 pg_port = 5454
 http_port = 7676

 [[safekeepers]]
-id = 2
+name = 'sk2'
 pg_port = 5455
 http_port = 7677

 [[safekeepers]]
-id = 3
+name = 'sk3'
 pg_port = 5456
 http_port = 7678
--- a/control_plane/simple.conf
+++ b/control_plane/simple.conf
@@ -1,11 +1,11 @@
 # Minimal zenith environment with one safekeeper. This is equivalent to the built-in
 # defaults that you get with no --config
 [pageserver]
-listen_pg_addr = '127.0.0.1:64000'
-listen_http_addr = '127.0.0.1:9898'
+listen_pg_addr = 'localhost:64000'
+listen_http_addr = 'localhost:9898'
 auth_type = 'Trust'

 [[safekeepers]]
-id = 1
+name = 'single'
 pg_port = 5454
 http_port = 7676
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -334,26 +334,14 @@ impl PostgresNode {
        if let Some(lsn) = self.lsn {
            conf.append("recovery_target_lsn", &lsn.to_string());
        }
-
        conf.append_line("");
-        // Configure backpressure
-        // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
-        //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
-        //   so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
-        //   Actually latency should be much smaller (better if < 1sec). But we assume that recently
-        //   updates pages are not requested from pageserver.
-        // - Replication flush lag depends on speed of persisting data by checkpointer (creation of
-        //   delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
-        //   remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
-        //   recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
-        // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
-        //   To be able to restore database in case of pageserver node crash, safekeeper should not
-        //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
-        //   (if they are not able to upload WAL to S3).
-        conf.append("max_replication_write_lag", "500MB");
-        conf.append("max_replication_flush_lag", "10GB");

        if !self.env.safekeepers.is_empty() {
+            // Configure backpressure
+            // In setup with safekeepers apply_lag depends on
+            // speed of data checkpointing on pageserver (see disk_consistent_lsn).
+            conf.append("max_replication_apply_lag", "1500MB");
+
            // Configure the node to connect to the safekeepers
            conf.append("synchronous_standby_names", "walproposer");

@@ -366,6 +354,11 @@ impl PostgresNode {
                .join(",");
            conf.append("wal_acceptors", &wal_acceptors);
        } else {
+            // Configure backpressure
+            // In setup without safekeepers, flush_lag depends on
+            // speed of of data checkpointing on pageserver (see disk_consistent_lsn)
+            conf.append("max_replication_flush_lag", "1500MB");
+
            // We only use setup without safekeepers for tests,
            // and don't care about data durability on pageserver,
            // so set more relaxed synchronous_commit.
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -12,9 +12,7 @@ use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
 use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::{opt_display_serde, ZNodeId, ZTenantId};
-
-use crate::safekeeper::SafekeeperNode;
+use zenith_utils::zid::{opt_display_serde, ZTenantId};

 //
 // This data structures represents zenith CLI config
@@ -64,8 +62,6 @@ pub struct LocalEnv {
 #[derive(Serialize, Deserialize, Clone, Debug)]
 #[serde(default)]
 pub struct PageServerConf {
-    // node id
-    pub id: ZNodeId,
    // Pageserver connection settings
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
@@ -80,7 +76,6 @@ pub struct PageServerConf {
 impl Default for PageServerConf {
    fn default() -> Self {
        Self {
-            id: ZNodeId(0),
            listen_pg_addr: String::new(),
            listen_http_addr: String::new(),
            auth_type: AuthType::Trust,
@@ -92,7 +87,7 @@ impl Default for PageServerConf {
 #[derive(Serialize, Deserialize, Clone, Debug)]
 #[serde(default)]
 pub struct SafekeeperConf {
-    pub id: ZNodeId,
+    pub name: String,
    pub pg_port: u16,
    pub http_port: u16,
    pub sync: bool,
@@ -101,7 +96,7 @@ pub struct SafekeeperConf {
 impl Default for SafekeeperConf {
    fn default() -> Self {
        Self {
-            id: ZNodeId(0),
+            name: String::new(),
            pg_port: 0,
            http_port: 0,
            sync: true,
@@ -141,8 +136,8 @@ impl LocalEnv {
        self.base_data_dir.clone()
    }

-    pub fn safekeeper_data_dir(&self, data_dir_name: &str) -> PathBuf {
-        self.base_data_dir.join("safekeepers").join(data_dir_name)
+    pub fn safekeeper_data_dir(&self, node_name: &str) -> PathBuf {
+        self.base_data_dir.join("safekeepers").join(node_name)
    }

    /// Create a LocalEnv from a config file.
@@ -290,7 +285,7 @@ impl LocalEnv {
        fs::create_dir_all(self.pg_data_dirs_path())?;

        for safekeeper in &self.safekeepers {
-            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
+            fs::create_dir_all(self.safekeeper_data_dir(&safekeeper.name))?;
        }

        let mut conf_content = String::new();
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -15,7 +15,6 @@ use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
 use zenith_utils::http::error::HttpErrorBody;
-use zenith_utils::zid::ZNodeId;

 use crate::local_env::{LocalEnv, SafekeeperConf};
 use crate::storage::PageServerNode;
@@ -62,7 +61,7 @@ impl ResponseErrorMessageExt for Response {
 //
 #[derive(Debug)]
 pub struct SafekeeperNode {
-    pub id: ZNodeId,
+    pub name: String,

    pub conf: SafekeeperConf,

@@ -78,15 +77,15 @@ impl SafekeeperNode {
    pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
        let pageserver = Arc::new(PageServerNode::from_env(env));

-        println!("initializing for sk {} for {}", conf.id, conf.http_port);
+        println!("initializing for {} for {}", conf.name, conf.http_port);

        SafekeeperNode {
-            id: conf.id,
+            name: conf.name.clone(),
            conf: conf.clone(),
            pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
            env: env.clone(),
            http_client: Client::new(),
-            http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
+            http_base_url: format!("http://localhost:{}/v1", conf.http_port),
            pageserver,
        }
    }
@@ -94,17 +93,13 @@ impl SafekeeperNode {
    /// Construct libpq connection string for connecting to this safekeeper.
    fn safekeeper_connection_config(port: u16) -> Config {
        // TODO safekeeper authentication not implemented yet
-        format!("postgresql://no_user@127.0.0.1:{}/no_db", port)
+        format!("postgresql://no_user@localhost:{}/no_db", port)
            .parse()
            .unwrap()
    }

-    pub fn datadir_path_by_id(env: &LocalEnv, sk_id: ZNodeId) -> PathBuf {
-        env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
-    }
-
    pub fn datadir_path(&self) -> PathBuf {
-        SafekeeperNode::datadir_path_by_id(&self.env, self.id)
+        self.env.safekeeper_data_dir(&self.name)
    }

    pub fn pid_file(&self) -> PathBuf {
@@ -119,13 +114,12 @@ impl SafekeeperNode {
        );
        io::stdout().flush().unwrap();

-        let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
-        let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
+        let listen_pg = format!("localhost:{}", self.conf.pg_port);
+        let listen_http = format!("localhost:{}", self.conf.http_port);

        let mut cmd = Command::new(self.env.safekeeper_bin()?);
        fill_rust_env_vars(
            cmd.args(&["-D", self.datadir_path().to_str().unwrap()])
-                .args(&["--id", self.id.to_string().as_ref()])
                .args(&["--listen-pg", &listen_pg])
                .args(&["--listen-http", &listen_http])
                .args(&["--recall", "1 second"])
@@ -189,7 +183,7 @@ impl SafekeeperNode {
    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
        let pid_file = self.pid_file();
        if !pid_file.exists() {
-            println!("Safekeeper {} is already stopped", self.id);
+            println!("Safekeeper {} is already stopped", self.name);
            return Ok(());
        }
        let pid = read_pidfile(&pid_file)?;
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -103,8 +103,6 @@ impl PageServerNode {
    ) -> anyhow::Result<()> {
        let mut cmd = Command::new(self.env.pageserver_bin()?);

-        let id = format!("id={}", self.env.pageserver.id);
-
        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
        let base_data_dir_param = self.env.base_data_dir.display().to_string();
        let pg_distrib_dir_param =
@@ -124,7 +122,6 @@ impl PageServerNode {
        args.extend(["-c", &authg_type_param]);
        args.extend(["-c", &listen_http_addr_param]);
        args.extend(["-c", &listen_pg_addr_param]);
-        args.extend(["-c", &id]);

        for config_override in config_overrides {
            args.extend(["-c", config_override]);
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -4,7 +4,7 @@ set -eux
 if [ "$1" = 'pageserver' ]; then
    if [ ! -d "/data/tenants" ]; then
        echo "Initializing pageserver data directory"
-        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10"
+        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'"
    fi
    echo "Staring pageserver at 0.0.0.0:6400"
    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /data
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -1,6 +1,7 @@
 [package]
 name = "pageserver"
 version = "0.1.0"
+authors = ["Stas Kelvich <stas@zenith.tech>"]
 edition = "2021"

 [dependencies]
@@ -14,7 +15,7 @@ futures = "0.3.13"
 hyper = "0.14"
 lazy_static = "1.4.0"
 log = "0.4.14"
-clap = "3.0"
+clap = "2.33.0"
 daemonize = "0.4.1"
 tokio = { version = "1.11", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
 postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
@@ -22,6 +23,7 @@ postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
 tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
 tokio-stream = "0.1.8"
+routerify = "2"
 anyhow = { version = "1.0", features = ["backtrace"] }
 crc32c = "0.6.0"
 thiserror = "1.0"
@@ -30,7 +32,7 @@ tar = "0.4.33"
 humantime = "2.1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
-toml_edit = { version = "0.13", features = ["easy"] }
+toml_edit = { version = "0.12", features = ["easy"] }
 scopeguard = "1.1.0"
 async-trait = "0.1"
 const_format = "0.2.21"
@@ -40,8 +42,8 @@ signal-hook = "0.3.10"
 url = "2"
 nix = "0.23"
 once_cell = "1.8.0"
+parking_lot = "0.11.2"
 crossbeam-utils = "0.8.5"
-fail = "0.5.0"

 rust-s3 = { version = "0.28", default-features = false, features = ["no-verify-ssl", "tokio-rustls-tls"] }
 async-compression = {version = "0.3", features = ["zstd", "tokio"]}
--- a/pageserver/src/bin/dump_layerfile.rs
+++ b/pageserver/src/bin/dump_layerfile.rs
@@ -13,7 +13,7 @@ fn main() -> Result<()> {
        .about("Dump contents of one layer file, for debugging")
        .version(GIT_VERSION)
        .arg(
-            Arg::new("path")
+            Arg::with_name("path")
                .help("Path to file to dump")
                .required(true)
                .index(1),
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -27,27 +27,27 @@ fn main() -> Result<()> {
        .about("Materializes WAL stream to pages and serves them to the postgres")
        .version(GIT_VERSION)
        .arg(
-            Arg::new("daemonize")
-                .short('d')
+            Arg::with_name("daemonize")
+                .short("d")
                .long("daemonize")
                .takes_value(false)
                .help("Run in the background"),
        )
        .arg(
-            Arg::new("init")
+            Arg::with_name("init")
                .long("init")
                .takes_value(false)
                .help("Initialize pageserver repo"),
        )
        .arg(
-            Arg::new("workdir")
-                .short('D')
+            Arg::with_name("workdir")
+                .short("D")
                .long("workdir")
                .takes_value(true)
                .help("Working directory for the pageserver"),
        )
        .arg(
-            Arg::new("create-tenant")
+            Arg::with_name("create-tenant")
                .long("create-tenant")
                .takes_value(true)
                .help("Create tenant during init")
@@ -55,13 +55,13 @@ fn main() -> Result<()> {
        )
        // See `settings.md` for more details on the extra configuration patameters pageserver can process
        .arg(
-            Arg::new("config-override")
-                .short('c')
+            Arg::with_name("config-override")
+                .short("c")
                .takes_value(true)
                .number_of_values(1)
-                .multiple_occurrences(true)
+                .multiple(true)
                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
-                Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
+                Any option has to be a valid toml document, example: `-c \"foo='hey'\"` `-c \"foo={value=1}\"`"),
        )
        .get_matches();

@@ -115,14 +115,7 @@ fn main() -> Result<()> {
                    option_line
                )
            })?;
-
            for (key, item) in doc.iter() {
-                if key == "id" {
-                    anyhow::ensure!(
-                        init,
-                        "node id can only be set during pageserver init and cannot be overridden"
-                    );
-                }
                toml.insert(key, item.clone());
            }
        }
--- a/pageserver/src/bin/pageserver_zst.rs
+++ b/pageserver/src/bin/pageserver_zst.rs
@@ -1,334 +0,0 @@
-//! A CLI helper to deal with remote storage (S3, usually) blobs as archives.
-//! See [`compression`] for more details about the archives.
-
-use std::{collections::BTreeSet, path::Path};
-
-use anyhow::{bail, ensure, Context};
-use clap::{App, Arg};
-use pageserver::{
-    layered_repository::metadata::{TimelineMetadata, METADATA_FILE_NAME},
-    remote_storage::compression,
-};
-use tokio::{fs, io};
-use zenith_utils::GIT_VERSION;
-
-const LIST_SUBCOMMAND: &str = "list";
-const ARCHIVE_ARG_NAME: &str = "archive";
-
-const EXTRACT_SUBCOMMAND: &str = "extract";
-const TARGET_DIRECTORY_ARG_NAME: &str = "target_directory";
-
-const CREATE_SUBCOMMAND: &str = "create";
-const SOURCE_DIRECTORY_ARG_NAME: &str = "source_directory";
-
-#[tokio::main(flavor = "current_thread")]
-async fn main() -> anyhow::Result<()> {
-    let arg_matches = App::new("pageserver zst blob [un]compressor utility")
-        .version(GIT_VERSION)
-        .subcommands(vec![
-            App::new(LIST_SUBCOMMAND)
-                .about("List the archive contents")
-                .arg(
-                    Arg::new(ARCHIVE_ARG_NAME)
-                        .required(true)
-                        .takes_value(true)
-                        .help("An archive to list the contents of"),
-                ),
-            App::new(EXTRACT_SUBCOMMAND)
-                .about("Extracts the archive into the directory")
-                .arg(
-                    Arg::new(ARCHIVE_ARG_NAME)
-                        .required(true)
-                        .takes_value(true)
-                        .help("An archive to extract"),
-                )
-                .arg(
-                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
-                        .required(false)
-                        .takes_value(true)
-                        .help("A directory to extract the archive into. Optional, will use the current directory if not specified"),
-                ),
-            App::new(CREATE_SUBCOMMAND)
-                .about("Creates an archive with the contents of a directory (only the first level files are taken, metadata file has to be present in the same directory)")
-                .arg(
-                    Arg::new(SOURCE_DIRECTORY_ARG_NAME)
-                        .required(true)
-                        .takes_value(true)
-                        .help("A directory to use for creating the archive"),
-                )
-                .arg(
-                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
-                        .required(false)
-                        .takes_value(true)
-                        .help("A directory to create the archive in. Optional, will use the current directory if not specified"),
-                ),
-        ])
-        .get_matches();
-
-    let subcommand_name = match arg_matches.subcommand_name() {
-        Some(name) => name,
-        None => bail!("No subcommand specified"),
-    };
-
-    let subcommand_matches = match arg_matches.subcommand_matches(subcommand_name) {
-        Some(matches) => matches,
-        None => bail!(
-            "No subcommand arguments were recognized for subcommand '{}'",
-            subcommand_name
-        ),
-    };
-
-    let target_dir = Path::new(
-        subcommand_matches
-            .value_of(TARGET_DIRECTORY_ARG_NAME)
-            .unwrap_or("./"),
-    );
-
-    match subcommand_name {
-        LIST_SUBCOMMAND => {
-            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
-                Some(archive) => Path::new(archive),
-                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
-            };
-            list_archive(archive).await
-        }
-        EXTRACT_SUBCOMMAND => {
-            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
-                Some(archive) => Path::new(archive),
-                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
-            };
-            extract_archive(archive, target_dir).await
-        }
-        CREATE_SUBCOMMAND => {
-            let source_dir = match subcommand_matches.value_of(SOURCE_DIRECTORY_ARG_NAME) {
-                Some(source) => Path::new(source),
-                None => bail!("No '{}' argument is specified", SOURCE_DIRECTORY_ARG_NAME),
-            };
-            create_archive(source_dir, target_dir).await
-        }
-        unknown => bail!("Unknown subcommand {}", unknown),
-    }
-}
-
-async fn list_archive(archive: &Path) -> anyhow::Result<()> {
-    let archive = archive.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the archive path '{}'",
-            archive.display()
-        )
-    })?;
-    ensure!(
-        archive.is_file(),
-        "Path '{}' is not an archive file",
-        archive.display()
-    );
-    println!("Listing an archive at path '{}'", archive.display());
-    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
-        Some(name) => name,
-        None => bail!(
-            "Failed to get the archive name from the path '{}'",
-            archive.display()
-        ),
-    };
-
-    let archive_bytes = fs::read(&archive)
-        .await
-        .context("Failed to read the archive bytes")?;
-
-    let header = compression::read_archive_header(archive_name, &mut archive_bytes.as_slice())
-        .await
-        .context("Failed to read the archive header")?;
-
-    let empty_path = Path::new("");
-    println!("-------------------------------");
-
-    let longest_path_in_archive = header
-        .files
-        .iter()
-        .filter_map(|file| Some(file.subpath.as_path(empty_path).to_str()?.len()))
-        .max()
-        .unwrap_or_default()
-        .max(METADATA_FILE_NAME.len());
-
-    for regular_file in &header.files {
-        println!(
-            "File: {:width$} uncompressed size: {} bytes",
-            regular_file.subpath.as_path(empty_path).display(),
-            regular_file.size,
-            width = longest_path_in_archive,
-        )
-    }
-    println!(
-        "File: {:width$} uncompressed size: {} bytes",
-        METADATA_FILE_NAME,
-        header.metadata_file_size,
-        width = longest_path_in_archive,
-    );
-    println!("-------------------------------");
-
-    Ok(())
-}
-
-async fn extract_archive(archive: &Path, target_dir: &Path) -> anyhow::Result<()> {
-    let archive = archive.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the archive path '{}'",
-            archive.display()
-        )
-    })?;
-    ensure!(
-        archive.is_file(),
-        "Path '{}' is not an archive file",
-        archive.display()
-    );
-    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
-        Some(name) => name,
-        None => bail!(
-            "Failed to get the archive name from the path '{}'",
-            archive.display()
-        ),
-    };
-
-    if !target_dir.exists() {
-        fs::create_dir_all(target_dir).await.with_context(|| {
-            format!(
-                "Failed to create the target dir at path '{}'",
-                target_dir.display()
-            )
-        })?;
-    }
-    let target_dir = target_dir.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the target dir path '{}'",
-            target_dir.display()
-        )
-    })?;
-    ensure!(
-        target_dir.is_dir(),
-        "Path '{}' is not a directory",
-        target_dir.display()
-    );
-    let mut dir_contents = fs::read_dir(&target_dir)
-        .await
-        .context("Failed to list the target directory contents")?;
-    let dir_entry = dir_contents
-        .next_entry()
-        .await
-        .context("Failed to list the target directory contents")?;
-    ensure!(
-        dir_entry.is_none(),
-        "Target directory '{}' is not empty",
-        target_dir.display()
-    );
-
-    println!(
-        "Extracting an archive at path '{}' into directory '{}'",
-        archive.display(),
-        target_dir.display()
-    );
-
-    let mut archive_file = fs::File::open(&archive).await.with_context(|| {
-        format!(
-            "Failed to get the archive name from the path '{}'",
-            archive.display()
-        )
-    })?;
-    let header = compression::read_archive_header(archive_name, &mut archive_file)
-        .await
-        .context("Failed to read the archive header")?;
-    compression::uncompress_with_header(&BTreeSet::new(), &target_dir, header, &mut archive_file)
-        .await
-        .context("Failed to extract the archive")
-}
-
-async fn create_archive(source_dir: &Path, target_dir: &Path) -> anyhow::Result<()> {
-    let source_dir = source_dir.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the source dir path '{}'",
-            source_dir.display()
-        )
-    })?;
-    ensure!(
-        source_dir.is_dir(),
-        "Path '{}' is not a directory",
-        source_dir.display()
-    );
-
-    if !target_dir.exists() {
-        fs::create_dir_all(target_dir).await.with_context(|| {
-            format!(
-                "Failed to create the target dir at path '{}'",
-                target_dir.display()
-            )
-        })?;
-    }
-    let target_dir = target_dir.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the target dir path '{}'",
-            target_dir.display()
-        )
-    })?;
-    ensure!(
-        target_dir.is_dir(),
-        "Path '{}' is not a directory",
-        target_dir.display()
-    );
-
-    println!(
-        "Compressing directory '{}' and creating resulting archive in directory '{}'",
-        source_dir.display(),
-        target_dir.display()
-    );
-
-    let mut metadata_file_contents = None;
-    let mut files_co_archive = Vec::new();
-
-    let mut source_dir_contents = fs::read_dir(&source_dir)
-        .await
-        .context("Failed to read the source directory contents")?;
-
-    while let Some(source_dir_entry) = source_dir_contents
-        .next_entry()
-        .await
-        .context("Failed to read a source dir entry")?
-    {
-        let entry_path = source_dir_entry.path();
-        if entry_path.is_file() {
-            if entry_path.file_name().and_then(|name| name.to_str()) == Some(METADATA_FILE_NAME) {
-                let metadata_bytes = fs::read(entry_path)
-                    .await
-                    .context("Failed to read metata file bytes in the source dir")?;
-                metadata_file_contents = Some(
-                    TimelineMetadata::from_bytes(&metadata_bytes)
-                        .context("Failed to parse metata file contents in the source dir")?,
-                );
-            } else {
-                files_co_archive.push(entry_path);
-            }
-        }
-    }
-
-    let metadata = match metadata_file_contents {
-        Some(metadata) => metadata,
-        None => bail!(
-            "No metadata file found in the source dir '{}', cannot create the archive",
-            source_dir.display()
-        ),
-    };
-
-    let _ = compression::archive_files_as_stream(
-        &source_dir,
-        files_co_archive.iter(),
-        &metadata,
-        move |mut archive_streamer, archive_name| async move {
-            let archive_target = target_dir.join(&archive_name);
-            let mut archive_file = fs::File::create(&archive_target).await?;
-            io::copy(&mut archive_streamer, &mut archive_file).await?;
-            Ok(archive_target)
-        },
-    )
-    .await
-    .context("Failed to create an archive")?;
-
-    Ok(())
-}
--- a/pageserver/src/bin/update_metadata.rs
+++ b/pageserver/src/bin/update_metadata.rs
@@ -14,20 +14,20 @@ fn main() -> Result<()> {
        .about("Dump or update metadata file")
        .version(GIT_VERSION)
        .arg(
-            Arg::new("path")
+            Arg::with_name("path")
                .help("Path to metadata file")
                .required(true),
        )
        .arg(
-            Arg::new("disk_lsn")
-                .short('d')
+            Arg::with_name("disk_lsn")
+                .short("d")
                .long("disk_lsn")
                .takes_value(true)
                .help("Replace disk constistent lsn"),
        )
        .arg(
-            Arg::new("prev_lsn")
-                .short('p')
+            Arg::with_name("prev_lsn")
+                .short("p")
                .long("prev_lsn")
                .takes_value(true)
                .help("Previous record LSN"),
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -8,7 +8,7 @@ use anyhow::{bail, ensure, Context, Result};
 use toml_edit;
 use toml_edit::{Document, Item};
 use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
+use zenith_utils::zid::{ZTenantId, ZTimelineId};

 use std::convert::TryInto;
 use std::env;
@@ -72,10 +72,6 @@ pub mod defaults {

 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct PageServerConf {
-    // Identifier of that particular pageserver so e g safekeepers
-    // can safely distinguish different pageservers
-    pub id: ZNodeId,
-
    /// Example (default): 127.0.0.1:64000
    pub listen_pg_addr: String,
    /// Example (default): 127.0.0.1:9898
@@ -110,184 +106,6 @@ pub struct PageServerConf {
    pub remote_storage_config: Option<RemoteStorageConfig>,
 }

-// use dedicated enum for builder to better indicate the intention
-// and avoid possible confusion with nested options
-pub enum BuilderValue<T> {
-    Set(T),
-    NotSet,
-}
-
-impl<T> BuilderValue<T> {
-    pub fn ok_or<E>(self, err: E) -> Result<T, E> {
-        match self {
-            Self::Set(v) => Ok(v),
-            Self::NotSet => Err(err),
-        }
-    }
-}
-
-// needed to simplify config construction
-struct PageServerConfigBuilder {
-    listen_pg_addr: BuilderValue<String>,
-
-    listen_http_addr: BuilderValue<String>,
-
-    checkpoint_distance: BuilderValue<u64>,
-    checkpoint_period: BuilderValue<Duration>,
-
-    gc_horizon: BuilderValue<u64>,
-    gc_period: BuilderValue<Duration>,
-    superuser: BuilderValue<String>,
-
-    page_cache_size: BuilderValue<usize>,
-    max_file_descriptors: BuilderValue<usize>,
-
-    workdir: BuilderValue<PathBuf>,
-
-    pg_distrib_dir: BuilderValue<PathBuf>,
-
-    auth_type: BuilderValue<AuthType>,
-
-    //
-    auth_validation_public_key_path: BuilderValue<Option<PathBuf>>,
-    remote_storage_config: BuilderValue<Option<RemoteStorageConfig>>,
-
-    id: BuilderValue<ZNodeId>,
-}
-
-impl Default for PageServerConfigBuilder {
-    fn default() -> Self {
-        use self::BuilderValue::*;
-        use defaults::*;
-        Self {
-            listen_pg_addr: Set(DEFAULT_PG_LISTEN_ADDR.to_string()),
-            listen_http_addr: Set(DEFAULT_HTTP_LISTEN_ADDR.to_string()),
-            checkpoint_distance: Set(DEFAULT_CHECKPOINT_DISTANCE),
-            checkpoint_period: Set(humantime::parse_duration(DEFAULT_CHECKPOINT_PERIOD)
-                .expect("cannot parse default checkpoint period")),
-            gc_horizon: Set(DEFAULT_GC_HORIZON),
-            gc_period: Set(humantime::parse_duration(DEFAULT_GC_PERIOD)
-                .expect("cannot parse default gc period")),
-            superuser: Set(DEFAULT_SUPERUSER.to_string()),
-            page_cache_size: Set(DEFAULT_PAGE_CACHE_SIZE),
-            max_file_descriptors: Set(DEFAULT_MAX_FILE_DESCRIPTORS),
-            workdir: Set(PathBuf::new()),
-            pg_distrib_dir: Set(env::current_dir()
-                .expect("cannot access current directory")
-                .join("tmp_install")),
-            auth_type: Set(AuthType::Trust),
-            auth_validation_public_key_path: Set(None),
-            remote_storage_config: Set(None),
-            id: NotSet,
-        }
-    }
-}
-
-impl PageServerConfigBuilder {
-    pub fn listen_pg_addr(&mut self, listen_pg_addr: String) {
-        self.listen_pg_addr = BuilderValue::Set(listen_pg_addr)
-    }
-
-    pub fn listen_http_addr(&mut self, listen_http_addr: String) {
-        self.listen_http_addr = BuilderValue::Set(listen_http_addr)
-    }
-
-    pub fn checkpoint_distance(&mut self, checkpoint_distance: u64) {
-        self.checkpoint_distance = BuilderValue::Set(checkpoint_distance)
-    }
-
-    pub fn checkpoint_period(&mut self, checkpoint_period: Duration) {
-        self.checkpoint_period = BuilderValue::Set(checkpoint_period)
-    }
-
-    pub fn gc_horizon(&mut self, gc_horizon: u64) {
-        self.gc_horizon = BuilderValue::Set(gc_horizon)
-    }
-
-    pub fn gc_period(&mut self, gc_period: Duration) {
-        self.gc_period = BuilderValue::Set(gc_period)
-    }
-
-    pub fn superuser(&mut self, superuser: String) {
-        self.superuser = BuilderValue::Set(superuser)
-    }
-
-    pub fn page_cache_size(&mut self, page_cache_size: usize) {
-        self.page_cache_size = BuilderValue::Set(page_cache_size)
-    }
-
-    pub fn max_file_descriptors(&mut self, max_file_descriptors: usize) {
-        self.max_file_descriptors = BuilderValue::Set(max_file_descriptors)
-    }
-
-    pub fn workdir(&mut self, workdir: PathBuf) {
-        self.workdir = BuilderValue::Set(workdir)
-    }
-
-    pub fn pg_distrib_dir(&mut self, pg_distrib_dir: PathBuf) {
-        self.pg_distrib_dir = BuilderValue::Set(pg_distrib_dir)
-    }
-
-    pub fn auth_type(&mut self, auth_type: AuthType) {
-        self.auth_type = BuilderValue::Set(auth_type)
-    }
-
-    pub fn auth_validation_public_key_path(
-        &mut self,
-        auth_validation_public_key_path: Option<PathBuf>,
-    ) {
-        self.auth_validation_public_key_path = BuilderValue::Set(auth_validation_public_key_path)
-    }
-
-    pub fn remote_storage_config(&mut self, remote_storage_config: Option<RemoteStorageConfig>) {
-        self.remote_storage_config = BuilderValue::Set(remote_storage_config)
-    }
-
-    pub fn id(&mut self, node_id: ZNodeId) {
-        self.id = BuilderValue::Set(node_id)
-    }
-
-    pub fn build(self) -> Result<PageServerConf> {
-        Ok(PageServerConf {
-            listen_pg_addr: self
-                .listen_pg_addr
-                .ok_or(anyhow::anyhow!("missing listen_pg_addr"))?,
-            listen_http_addr: self
-                .listen_http_addr
-                .ok_or(anyhow::anyhow!("missing listen_http_addr"))?,
-            checkpoint_distance: self
-                .checkpoint_distance
-                .ok_or(anyhow::anyhow!("missing checkpoint_distance"))?,
-            checkpoint_period: self
-                .checkpoint_period
-                .ok_or(anyhow::anyhow!("missing checkpoint_period"))?,
-            gc_horizon: self
-                .gc_horizon
-                .ok_or(anyhow::anyhow!("missing gc_horizon"))?,
-            gc_period: self.gc_period.ok_or(anyhow::anyhow!("missing gc_period"))?,
-            superuser: self.superuser.ok_or(anyhow::anyhow!("missing superuser"))?,
-            page_cache_size: self
-                .page_cache_size
-                .ok_or(anyhow::anyhow!("missing page_cache_size"))?,
-            max_file_descriptors: self
-                .max_file_descriptors
-                .ok_or(anyhow::anyhow!("missing max_file_descriptors"))?,
-            workdir: self.workdir.ok_or(anyhow::anyhow!("missing workdir"))?,
-            pg_distrib_dir: self
-                .pg_distrib_dir
-                .ok_or(anyhow::anyhow!("missing pg_distrib_dir"))?,
-            auth_type: self.auth_type.ok_or(anyhow::anyhow!("missing auth_type"))?,
-            auth_validation_public_key_path: self
-                .auth_validation_public_key_path
-                .ok_or(anyhow::anyhow!("missing auth_validation_public_key_path"))?,
-            remote_storage_config: self
-                .remote_storage_config
-                .ok_or(anyhow::anyhow!("missing remote_storage_config"))?,
-            id: self.id.ok_or(anyhow::anyhow!("missing id"))?,
-        })
-    }
-}
-
 /// External backup storage configuration, enough for creating a client for that storage.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct RemoteStorageConfig {
@@ -403,39 +221,57 @@ impl PageServerConf {
    ///
    /// This leaves any options not present in the file in the built-in defaults.
    pub fn parse_and_validate(toml: &Document, workdir: &Path) -> Result<Self> {
-        let mut builder = PageServerConfigBuilder::default();
-        builder.workdir(workdir.to_owned());
+        use defaults::*;
+
+        let mut conf = PageServerConf {
+            workdir: workdir.to_path_buf(),
+
+            listen_pg_addr: DEFAULT_PG_LISTEN_ADDR.to_string(),
+            listen_http_addr: DEFAULT_HTTP_LISTEN_ADDR.to_string(),
+            checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
+            checkpoint_period: humantime::parse_duration(DEFAULT_CHECKPOINT_PERIOD)?,
+            gc_horizon: DEFAULT_GC_HORIZON,
+            gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)?,
+            page_cache_size: DEFAULT_PAGE_CACHE_SIZE,
+            max_file_descriptors: DEFAULT_MAX_FILE_DESCRIPTORS,
+
+            pg_distrib_dir: PathBuf::new(),
+            auth_validation_public_key_path: None,
+            auth_type: AuthType::Trust,
+
+            remote_storage_config: None,
+
+            superuser: DEFAULT_SUPERUSER.to_string(),
+        };

        for (key, item) in toml.iter() {
            match key {
-                "listen_pg_addr" => builder.listen_pg_addr(parse_toml_string(key, item)?),
-                "listen_http_addr" => builder.listen_http_addr(parse_toml_string(key, item)?),
-                "checkpoint_distance" => builder.checkpoint_distance(parse_toml_u64(key, item)?),
-                "checkpoint_period" => builder.checkpoint_period(parse_toml_duration(key, item)?),
-                "gc_horizon" => builder.gc_horizon(parse_toml_u64(key, item)?),
-                "gc_period" => builder.gc_period(parse_toml_duration(key, item)?),
-                "initial_superuser_name" => builder.superuser(parse_toml_string(key, item)?),
-                "page_cache_size" => builder.page_cache_size(parse_toml_u64(key, item)? as usize),
+                "listen_pg_addr" => conf.listen_pg_addr = parse_toml_string(key, item)?,
+                "listen_http_addr" => conf.listen_http_addr = parse_toml_string(key, item)?,
+                "checkpoint_distance" => conf.checkpoint_distance = parse_toml_u64(key, item)?,
+                "checkpoint_period" => conf.checkpoint_period = parse_toml_duration(key, item)?,
+                "gc_horizon" => conf.gc_horizon = parse_toml_u64(key, item)?,
+                "gc_period" => conf.gc_period = parse_toml_duration(key, item)?,
+                "initial_superuser_name" => conf.superuser = parse_toml_string(key, item)?,
+                "page_cache_size" => conf.page_cache_size = parse_toml_u64(key, item)? as usize,
                "max_file_descriptors" => {
-                    builder.max_file_descriptors(parse_toml_u64(key, item)? as usize)
+                    conf.max_file_descriptors = parse_toml_u64(key, item)? as usize
                }
                "pg_distrib_dir" => {
-                    builder.pg_distrib_dir(PathBuf::from(parse_toml_string(key, item)?))
+                    conf.pg_distrib_dir = PathBuf::from(parse_toml_string(key, item)?)
                }
-                "auth_validation_public_key_path" => builder.auth_validation_public_key_path(Some(
-                    PathBuf::from(parse_toml_string(key, item)?),
-                )),
-                "auth_type" => builder.auth_type(parse_toml_auth_type(key, item)?),
+                "auth_validation_public_key_path" => {
+                    conf.auth_validation_public_key_path =
+                        Some(PathBuf::from(parse_toml_string(key, item)?))
+                }
+                "auth_type" => conf.auth_type = parse_toml_auth_type(key, item)?,
                "remote_storage" => {
-                    builder.remote_storage_config(Some(Self::parse_remote_storage_config(item)?))
+                    conf.remote_storage_config = Some(Self::parse_remote_storage_config(item)?)
                }
-                "id" => builder.id(ZNodeId(parse_toml_u64(key, item)?)),
                _ => bail!("unrecognized pageserver option '{}'", key),
            }
        }

-        let mut conf = builder.build().context("invalid config")?;
-
        if conf.auth_type == AuthType::ZenithJWT {
            let auth_validation_public_key_path = conf
                .auth_validation_public_key_path
@@ -449,6 +285,9 @@ impl PageServerConf {
            );
        }

+        if conf.pg_distrib_dir == PathBuf::new() {
+            conf.pg_distrib_dir = env::current_dir()?.join("tmp_install")
+        };
        if !conf.pg_distrib_dir.join("bin/postgres").exists() {
            bail!(
                "Can't find postgres binary at {}",
@@ -543,7 +382,6 @@ impl PageServerConf {
    #[cfg(test)]
    pub fn dummy_conf(repo_dir: PathBuf) -> Self {
        PageServerConf {
-            id: ZNodeId(0),
            checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
            checkpoint_period: Duration::from_secs(10),
            gc_horizon: defaults::DEFAULT_GC_HORIZON,
@@ -623,16 +461,15 @@ max_file_descriptors = 333

 # initial superuser role name to use when creating a new tenant
 initial_superuser_name = 'zzzz'
-id = 10

-"#;
+    "#;

    #[test]
    fn parse_defaults() -> anyhow::Result<()> {
        let tempdir = tempdir()?;
        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
        // we have to create dummy pathes to overcome the validation errors
-        let config_string = format!("pg_distrib_dir='{}'\nid=10", pg_distrib_dir.display());
+        let config_string = format!("pg_distrib_dir='{}'", pg_distrib_dir.display());
        let toml = config_string.parse()?;

        let parsed_config =
@@ -643,7 +480,6 @@ id = 10
        assert_eq!(
            parsed_config,
            PageServerConf {
-                id: ZNodeId(10),
                listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
                listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
                checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
@@ -685,7 +521,6 @@ id = 10
        assert_eq!(
            parsed_config,
            PageServerConf {
-                id: ZNodeId(10),
                listen_pg_addr: "127.0.0.1:64000".to_string(),
                listen_http_addr: "127.0.0.1:9898".to_string(),
                checkpoint_distance: 111,
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -1,7 +1,6 @@
 use serde::{Deserialize, Serialize};

 use crate::ZTenantId;
-use zenith_utils::zid::ZNodeId;

 #[derive(Serialize, Deserialize)]
 pub struct BranchCreateRequest {
@@ -16,8 +15,3 @@ pub struct TenantCreateRequest {
    #[serde(with = "hex")]
    pub tenant_id: ZTenantId,
 }
-
-#[derive(Serialize)]
-pub struct StatusResponse {
-    pub id: ZNodeId,
-}
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -17,11 +17,6 @@ paths:
            application/json:
              schema:
                type: object
-                required:
-                - id
-                properties:
-                  id:
-                    type: integer
  /v1/timeline/{tenant_id}:
    parameters:
      - name: tenant_id
@@ -239,7 +234,9 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "#/components/schemas/BranchInfo"
+                type: array
+                items:
+                  $ref: "#/components/schemas/BranchInfo"
        "400":
          description: Malformed branch create request
          content:
@@ -373,15 +370,12 @@ components:
          format: hex
        ancestor_id:
          type: string
-          format: hex
        ancestor_lsn:
          type: string
        current_logical_size:
          type: integer
        current_logical_size_non_incremental:
          type: integer
-        latest_valid_lsn:
-          type: integer
    TimelineInfo:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1,8 +1,10 @@
 use std::sync::Arc;

 use anyhow::{Context, Result};
+use hyper::header;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
+use routerify::{ext::RequestExt, RouterBuilder};
 use serde::Serialize;
 use tracing::*;
 use zenith_utils::auth::JwtAuth;
@@ -17,12 +19,10 @@ use zenith_utils::http::{
    request::get_request_param,
    request::parse_request_param,
 };
-use zenith_utils::http::{RequestExt, RouterBuilder};
 use zenith_utils::lsn::Lsn;
 use zenith_utils::zid::{opt_display_serde, ZTimelineId};

 use super::models::BranchCreateRequest;
-use super::models::StatusResponse;
 use super::models::TenantCreateRequest;
 use crate::branches::BranchInfo;
 use crate::repository::RepositoryTimeline;
@@ -64,12 +64,12 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
 }

 // healthcheck handler
-async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let config = get_config(&request);
-    Ok(json_response(
-        StatusCode::OK,
-        StatusResponse { id: config.id },
-    )?)
+async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header(header::CONTENT_TYPE, "application/json")
+        .body(Body::from("{}"))
+        .map_err(ApiError::from_err)?)
 }

 async fn branch_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
--- a/pageserver/src/layered_repository/ephemeral_file.rs
+++ b/pageserver/src/layered_repository/ephemeral_file.rs
@@ -175,10 +175,7 @@ impl Write for EphemeralFile {
    }

    fn flush(&mut self) -> Result<(), std::io::Error> {
-        // we don't need to flush data:
-        // * we either write input bytes or not, not keeping any intermediate data buffered
-        // * rust unix file `flush` impl does not flush things either, returning `Ok(())`
-        Ok(())
+        todo!()
    }
 }

--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -27,10 +27,13 @@ use zenith_utils::lsn::Lsn;
 use zenith_utils::postgres_backend::is_socket_read_timed_out;
 use zenith_utils::postgres_backend::PostgresBackend;
 use zenith_utils::postgres_backend::{self, AuthType};
-use zenith_utils::pq_proto::{BeMessage, FeMessage, RowDescriptor, SINGLE_COL_ROWDESC};
+use zenith_utils::pq_proto::{
+    BeMessage, FeMessage, RowDescriptor, HELLO_WORLD_ROW, SINGLE_COL_ROWDESC,
+};
 use zenith_utils::zid::{ZTenantId, ZTimelineId};

 use crate::basebackup;
+use crate::branches;
 use crate::config::PageServerConf;
 use crate::relish::*;
 use crate::repository::Timeline;
@@ -659,21 +662,79 @@ impl postgres_backend::Handler for PageServerHandler {
            walreceiver::launch_wal_receiver(self.conf, tenantid, timelineid, &connstr)?;

            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("branch_create ") {
+            let err = || format!("invalid branch_create: '{}'", query_string);
+
+            // branch_create <tenantid> <branchname> <startpoint>
+            // TODO lazy static
+            // TODO: escaping, to allow branch names with spaces
+            let re = Regex::new(r"^branch_create ([[:xdigit:]]+) (\S+) ([^\r\n\s;]+)[\r\n\s;]*;?$")
+                .unwrap();
+            let caps = re.captures(query_string).with_context(err)?;
+
+            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
+            let branchname = caps.get(2).with_context(err)?.as_str().to_owned();
+            let startpoint_str = caps.get(3).with_context(err)?.as_str().to_owned();
+
+            self.check_permission(Some(tenantid))?;
+
+            let _enter =
+                info_span!("branch_create", name = %branchname, tenant = %tenantid).entered();
+
+            let branch =
+                branches::create_branch(self.conf, &branchname, &startpoint_str, &tenantid)?;
+            let branch = serde_json::to_vec(&branch)?;
+
+            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
+                .write_message_noflush(&BeMessage::DataRow(&[Some(&branch)]))?
+                .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("branch_list ") {
+            // branch_list <zenith tenantid as hex string>
+            let re = Regex::new(r"^branch_list ([[:xdigit:]]+)$").unwrap();
+            let caps = re
+                .captures(query_string)
+                .with_context(|| format!("invalid branch_list: '{}'", query_string))?;
+
+            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
+
+            // since these handlers for tenant/branch commands are deprecated (in favor of http based ones)
+            // just use false in place of include non incremental logical size
+            let branches = crate::branches::get_branches(self.conf, &tenantid, false)?;
+            let branches_buf = serde_json::to_vec(&branches)?;
+
+            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
+                .write_message_noflush(&BeMessage::DataRow(&[Some(&branches_buf)]))?
+                .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("tenant_list") {
+            let tenants = crate::tenant_mgr::list_tenants()?;
+            let tenants_buf = serde_json::to_vec(&tenants)?;
+
+            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
+                .write_message_noflush(&BeMessage::DataRow(&[Some(&tenants_buf)]))?
+                .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("tenant_create") {
+            let err = || format!("invalid tenant_create: '{}'", query_string);
+
+            // tenant_create <tenantid>
+            let re = Regex::new(r"^tenant_create ([[:xdigit:]]+)$").unwrap();
+            let caps = re.captures(query_string).with_context(err)?;
+
+            self.check_permission(None)?;
+
+            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
+
+            tenant_mgr::create_repository_for_tenant(self.conf, tenantid)?;
+
+            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
+                .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("status") {
+            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
+                .write_message_noflush(&HELLO_WORLD_ROW)?
+                .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.to_ascii_lowercase().starts_with("set ") {
            // important because psycopg2 executes "SET datestyle TO 'ISO'"
            // on connect
            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
-        } else if query_string.starts_with("failpoints ") {
-            let (_, failpoints) = query_string.split_at("failpoints ".len());
-            for failpoint in failpoints.split(';') {
-                if let Some((name, actions)) = failpoint.split_once('=') {
-                    info!("cfg failpoint: {} {}", name, actions);
-                    fail::cfg(name, actions).unwrap();
-                } else {
-                    bail!("Invalid failpoints format");
-                }
-            }
-            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.starts_with("do_gc ") {
            // Run GC immediately on given timeline.
            // FIXME: This is just for tests. See test_runner/batch_others/test_gc.py.
--- a/pageserver/src/remote_storage.rs
+++ b/pageserver/src/remote_storage.rs
@@ -94,7 +94,7 @@ use std::{
 use anyhow::{bail, Context};
 use tokio::io;
 use tracing::{error, info};
-use zenith_utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
+use zenith_utils::zid::{ZTenantId, ZTimelineId};

 pub use self::storage_sync::{schedule_timeline_checkpoint_upload, schedule_timeline_download};
 use self::{local_fs::LocalFs, rust_s3::S3};
@@ -104,7 +104,16 @@ use crate::{
    repository::TimelineSyncState,
 };

-pub use storage_sync::compression;
+/// Any timeline has its own id and its own tenant it belongs to,
+/// the sync processes group timelines by both for simplicity.
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub struct TimelineSyncId(ZTenantId, ZTimelineId);
+
+impl std::fmt::Display for TimelineSyncId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "(tenant: {}, timeline: {})", self.0, self.1)
+    }
+}

 /// A structure to combine all synchronization data to share with pageserver after a successful sync loop initialization.
 /// Successful initialization includes a case when sync loop is not started, in which case the startup data is returned still,
@@ -158,7 +167,7 @@ pub fn start_local_timeline_sync(
                ZTenantId,
                HashMap<ZTimelineId, TimelineSyncState>,
            > = HashMap::new();
-            for (ZTenantTimelineId{tenant_id, timeline_id}, (timeline_metadata, _)) in
+            for (TimelineSyncId(tenant_id, timeline_id), (timeline_metadata, _)) in
                local_timeline_files
            {
                initial_timeline_states
@@ -178,7 +187,7 @@ pub fn start_local_timeline_sync(

 fn local_tenant_timeline_files(
    config: &'static PageServerConf,
-) -> anyhow::Result<HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>> {
+) -> anyhow::Result<HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>> {
    let mut local_tenant_timeline_files = HashMap::new();
    let tenants_dir = config.tenants_path();
    for tenants_dir_entry in fs::read_dir(&tenants_dir)
@@ -213,9 +222,8 @@ fn local_tenant_timeline_files(
 fn collect_timelines_for_tenant(
    config: &'static PageServerConf,
    tenant_path: &Path,
-) -> anyhow::Result<HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>> {
-    let mut timelines: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)> =
-        HashMap::new();
+) -> anyhow::Result<HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>> {
+    let mut timelines: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)> = HashMap::new();
    let tenant_id = tenant_path
        .file_name()
        .and_then(ffi::OsStr::to_str)
@@ -236,10 +244,7 @@ fn collect_timelines_for_tenant(
                match collect_timeline_files(&timeline_path) {
                    Ok((timeline_id, metadata, timeline_files)) => {
                        timelines.insert(
-                            ZTenantTimelineId {
-                                tenant_id,
-                                timeline_id,
-                            },
+                            TimelineSyncId(tenant_id, timeline_id),
                            (metadata, timeline_files),
                        );
                    }
--- a/pageserver/src/remote_storage/storage_sync.rs
+++ b/pageserver/src/remote_storage/storage_sync.rs
@@ -70,8 +70,7 @@
 //!
 //! When pageserver signals shutdown, current sync task gets finished and the loop exists.

-/// Expose the module for a binary CLI tool that deals with the corresponding blobs.
-pub mod compression;
+mod compression;
 mod download;
 pub mod index;
 mod upload;
@@ -106,7 +105,7 @@ use self::{
    },
    upload::upload_timeline_checkpoint,
 };
-use super::{RemoteStorage, SyncStartupData, ZTenantTimelineId};
+use super::{RemoteStorage, SyncStartupData, TimelineSyncId};
 use crate::{
    config::PageServerConf, layered_repository::metadata::TimelineMetadata,
    remote_storage::storage_sync::compression::read_archive_header, repository::TimelineSyncState,
@@ -243,13 +242,13 @@ mod sync_queue {
 /// Limited by the number of retries, after certain threshold the failing task gets evicted and the timeline disabled.
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
 pub struct SyncTask {
-    sync_id: ZTenantTimelineId,
+    sync_id: TimelineSyncId,
    retries: u32,
    kind: SyncKind,
 }

 impl SyncTask {
-    fn new(sync_id: ZTenantTimelineId, retries: u32, kind: SyncKind) -> Self {
+    fn new(sync_id: TimelineSyncId, retries: u32, kind: SyncKind) -> Self {
        Self {
            sync_id,
            retries,
@@ -308,10 +307,7 @@ pub fn schedule_timeline_checkpoint_upload(
    }

    if !sync_queue::push(SyncTask::new(
-        ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        },
+        TimelineSyncId(tenant_id, timeline_id),
        0,
        SyncKind::Upload(NewCheckpoint { layers, metadata }),
    )) {
@@ -342,10 +338,7 @@ pub fn schedule_timeline_download(tenant_id: ZTenantId, timeline_id: ZTimelineId
        tenant_id, timeline_id
    );
    sync_queue::push(SyncTask::new(
-        ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        },
+        TimelineSyncId(tenant_id, timeline_id),
        0,
        SyncKind::Download(TimelineDownload {
            files_to_skip: Arc::new(BTreeSet::new()),
@@ -361,7 +354,7 @@ pub(super) fn spawn_storage_sync_thread<
    S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
 >(
    conf: &'static PageServerConf,
-    local_timeline_files: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>,
+    local_timeline_files: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>,
    storage: S,
    max_concurrent_sync: NonZeroUsize,
    max_sync_errors: NonZeroU32,
@@ -517,7 +510,7 @@ async fn loop_step<
                Err(e) => {
                    error!(
                        "Failed to process storage sync task for tenant {}, timeline {}: {:?}",
-                        sync_id.tenant_id, sync_id.timeline_id, e
+                        sync_id.0, sync_id.1, e
                    );
                    None
                }
@@ -531,10 +524,7 @@ async fn loop_step<
    while let Some((sync_id, state_update)) = task_batch.next().await {
        debug!("Finished storage sync task for sync id {}", sync_id);
        if let Some(state_update) = state_update {
-            let ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            } = sync_id;
+            let TimelineSyncId(tenant_id, timeline_id) = sync_id;
            new_timeline_states
                .entry(tenant_id)
                .or_default()
@@ -628,7 +618,7 @@ async fn process_task<

 fn schedule_first_sync_tasks(
    index: &RemoteTimelineIndex,
-    local_timeline_files: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>,
+    local_timeline_files: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>,
 ) -> HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncState>> {
    let mut initial_timeline_statuses: HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncState>> =
        HashMap::new();
@@ -639,10 +629,7 @@ fn schedule_first_sync_tasks(
    for (sync_id, (local_metadata, local_files)) in local_timeline_files {
        let local_disk_consistent_lsn = local_metadata.disk_consistent_lsn();

-        let ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        } = sync_id;
+        let TimelineSyncId(tenant_id, timeline_id) = sync_id;
        match index.timeline_entry(&sync_id) {
            Some(index_entry) => {
                let timeline_status = compare_local_and_remote_timeline(
@@ -685,10 +672,10 @@ fn schedule_first_sync_tasks(
        }
    }

-    let unprocessed_remote_ids = |remote_id: &ZTenantTimelineId| {
+    let unprocessed_remote_ids = |remote_id: &TimelineSyncId| {
        initial_timeline_statuses
-            .get(&remote_id.tenant_id)
-            .and_then(|timelines| timelines.get(&remote_id.timeline_id))
+            .get(&remote_id.0)
+            .and_then(|timelines| timelines.get(&remote_id.1))
            .is_none()
    };
    for unprocessed_remote_id in index
@@ -696,10 +683,7 @@ fn schedule_first_sync_tasks(
        .filter(unprocessed_remote_ids)
        .collect::<Vec<_>>()
    {
-        let ZTenantTimelineId {
-            tenant_id: cloud_only_tenant_id,
-            timeline_id: cloud_only_timeline_id,
-        } = unprocessed_remote_id;
+        let TimelineSyncId(cloud_only_tenant_id, cloud_only_timeline_id) = unprocessed_remote_id;
        match index
            .timeline_entry(&unprocessed_remote_id)
            .and_then(TimelineIndexEntry::disk_consistent_lsn)
@@ -728,7 +712,7 @@ fn schedule_first_sync_tasks(

 fn compare_local_and_remote_timeline(
    new_sync_tasks: &mut VecDeque<SyncTask>,
-    sync_id: ZTenantTimelineId,
+    sync_id: TimelineSyncId,
    local_metadata: TimelineMetadata,
    local_files: Vec<PathBuf>,
    remote_entry: &TimelineIndexEntry,
@@ -785,7 +769,7 @@ async fn update_index_description<
 >(
    (storage, index): &(S, RwLock<RemoteTimelineIndex>),
    timeline_dir: &Path,
-    id: ZTenantTimelineId,
+    id: TimelineSyncId,
 ) -> anyhow::Result<RemoteTimeline> {
    let mut index_write = index.write().await;
    let full_index = match index_write.timeline_entry(&id) {
@@ -808,7 +792,7 @@ async fn update_index_description<
                        Ok((archive_id, header_size, header)) => full_index.update_archive_contents(archive_id.0, header, header_size),
                        Err((e, archive_id)) => bail!(
                            "Failed to download archive header for tenant {}, timeline {}, archive for Lsn {}: {}",
-                            id.tenant_id, id.timeline_id, archive_id.0,
+                            id.0, id.1, archive_id.0,
                            e
                        ),
                    }
@@ -886,7 +870,7 @@ mod test_utils {
        timeline_id: ZTimelineId,
        new_upload: NewCheckpoint,
    ) {
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, timeline_id);
+        let sync_id = TimelineSyncId(harness.tenant_id, timeline_id);
        upload_timeline_checkpoint(
            harness.conf,
            Arc::clone(&remote_assets),
@@ -942,7 +926,7 @@ mod test_utils {

    pub async fn expect_timeline(
        index: &RwLock<RemoteTimelineIndex>,
-        sync_id: ZTenantTimelineId,
+        sync_id: TimelineSyncId,
    ) -> RemoteTimeline {
        if let Some(TimelineIndexEntry::Full(remote_timeline)) =
            index.read().await.timeline_entry(&sync_id)
@@ -977,18 +961,18 @@ mod test_utils {
        let mut expected_timeline_entries = BTreeMap::new();
        for sync_id in actual_sync_ids {
            actual_branches.insert(
-                sync_id.tenant_id,
+                sync_id.1,
                index_read
-                    .branch_files(sync_id.tenant_id)
+                    .branch_files(sync_id.0)
                    .into_iter()
                    .flat_map(|branch_paths| branch_paths.iter())
                    .cloned()
                    .collect::<BTreeSet<_>>(),
            );
            expected_branches.insert(
-                sync_id.tenant_id,
+                sync_id.1,
                expected_index_with_descriptions
-                    .branch_files(sync_id.tenant_id)
+                    .branch_files(sync_id.0)
                    .into_iter()
                    .flat_map(|branch_paths| branch_paths.iter())
                    .cloned()
--- a/pageserver/src/remote_storage/storage_sync/compression.rs
+++ b/pageserver/src/remote_storage/storage_sync/compression.rs
@@ -248,7 +248,7 @@ fn archive_name(disk_consistent_lsn: Lsn, header_size: u64) -> String {
    archive_name
 }

-pub async fn uncompress_with_header(
+async fn uncompress_with_header(
    files_to_skip: &BTreeSet<PathBuf>,
    destination_dir: &Path,
    header: ArchiveHeader,
--- a/pageserver/src/remote_storage/storage_sync/download.rs
+++ b/pageserver/src/remote_storage/storage_sync/download.rs
@@ -17,7 +17,7 @@ use crate::{
            compression, index::TimelineIndexEntry, sync_queue, tenant_branch_files,
            update_index_description, SyncKind, SyncTask,
        },
-        RemoteStorage, ZTenantTimelineId,
+        RemoteStorage, TimelineSyncId,
    },
 };

@@ -52,16 +52,13 @@ pub(super) async fn download_timeline<
 >(
    conf: &'static PageServerConf,
    remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
-    sync_id: ZTenantTimelineId,
+    sync_id: TimelineSyncId,
    mut download: TimelineDownload,
    retries: u32,
 ) -> DownloadedTimeline {
    debug!("Downloading layers for sync id {}", sync_id);

-    let ZTenantTimelineId {
-        tenant_id,
-        timeline_id,
-    } = sync_id;
+    let TimelineSyncId(tenant_id, timeline_id) = sync_id;
    let index_read = remote_assets.1.read().await;
    let remote_timeline = match index_read.timeline_entry(&sync_id) {
        None => {
@@ -113,8 +110,7 @@ pub(super) async fn download_timeline<
        }
    };

-    if let Err(e) = download_missing_branches(conf, remote_assets.as_ref(), sync_id.tenant_id).await
-    {
+    if let Err(e) = download_missing_branches(conf, remote_assets.as_ref(), sync_id.0).await {
        error!(
            "Failed to download missing branches for sync id {}: {:?}",
            sync_id, e
@@ -184,10 +180,7 @@ async fn try_download_archive<
    S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
 >(
    conf: &'static PageServerConf,
-    ZTenantTimelineId {
-        tenant_id,
-        timeline_id,
-    }: ZTenantTimelineId,
+    TimelineSyncId(tenant_id, timeline_id): TimelineSyncId,
    remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
    remote_timeline: &RemoteTimeline,
    archive_id: ArchiveId,
@@ -350,7 +343,7 @@ mod tests {
    #[tokio::test]
    async fn test_download_timeline() -> anyhow::Result<()> {
        let repo_harness = RepoHarness::create("test_download_timeline")?;
-        let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
+        let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
        let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
        let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
            repo_harness.conf,
--- a/pageserver/src/remote_storage/storage_sync/index.rs
+++ b/pageserver/src/remote_storage/storage_sync/index.rs
@@ -22,7 +22,7 @@ use crate::{
    layered_repository::TIMELINES_SEGMENT_NAME,
    remote_storage::{
        storage_sync::compression::{parse_archive_name, FileEntry},
-        ZTenantTimelineId,
+        TimelineSyncId,
    },
 };

@@ -53,7 +53,7 @@ impl RelativePath {
 #[derive(Debug, Clone)]
 pub struct RemoteTimelineIndex {
    branch_files: HashMap<ZTenantId, HashSet<RelativePath>>,
-    timeline_files: HashMap<ZTenantTimelineId, TimelineIndexEntry>,
+    timeline_files: HashMap<TimelineSyncId, TimelineIndexEntry>,
 }

 impl RemoteTimelineIndex {
@@ -80,22 +80,19 @@ impl RemoteTimelineIndex {
        index
    }

-    pub fn timeline_entry(&self, id: &ZTenantTimelineId) -> Option<&TimelineIndexEntry> {
+    pub fn timeline_entry(&self, id: &TimelineSyncId) -> Option<&TimelineIndexEntry> {
        self.timeline_files.get(id)
    }

-    pub fn timeline_entry_mut(
-        &mut self,
-        id: &ZTenantTimelineId,
-    ) -> Option<&mut TimelineIndexEntry> {
+    pub fn timeline_entry_mut(&mut self, id: &TimelineSyncId) -> Option<&mut TimelineIndexEntry> {
        self.timeline_files.get_mut(id)
    }

-    pub fn add_timeline_entry(&mut self, id: ZTenantTimelineId, entry: TimelineIndexEntry) {
+    pub fn add_timeline_entry(&mut self, id: TimelineSyncId, entry: TimelineIndexEntry) {
        self.timeline_files.insert(id, entry);
    }

-    pub fn all_sync_ids(&self) -> impl Iterator<Item = ZTenantTimelineId> + '_ {
+    pub fn all_sync_ids(&self) -> impl Iterator<Item = TimelineSyncId> + '_ {
        self.timeline_files.keys().copied()
    }

@@ -351,10 +348,7 @@ fn try_parse_index_entry(
                .to_string_lossy()
                .to_string();

-            let sync_id = ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            };
+            let sync_id = TimelineSyncId(tenant_id, timeline_id);
            let timeline_index_entry = index
                .timeline_files
                .entry(sync_id)
--- a/pageserver/src/remote_storage/storage_sync/upload.rs
+++ b/pageserver/src/remote_storage/storage_sync/upload.rs
@@ -17,7 +17,7 @@ use crate::{
            index::{RemoteTimeline, TimelineIndexEntry},
            sync_queue, tenant_branch_files, update_index_description, SyncKind, SyncTask,
        },
-        RemoteStorage, ZTenantTimelineId,
+        RemoteStorage, TimelineSyncId,
    },
 };

@@ -36,13 +36,12 @@ pub(super) async fn upload_timeline_checkpoint<
 >(
    config: &'static PageServerConf,
    remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
-    sync_id: ZTenantTimelineId,
+    sync_id: TimelineSyncId,
    new_checkpoint: NewCheckpoint,
    retries: u32,
 ) -> Option<bool> {
    debug!("Uploading checkpoint for sync id {}", sync_id);
-    if let Err(e) = upload_missing_branches(config, remote_assets.as_ref(), sync_id.tenant_id).await
-    {
+    if let Err(e) = upload_missing_branches(config, remote_assets.as_ref(), sync_id.0).await {
        error!(
            "Failed to upload missing branches for sync id {}: {:?}",
            sync_id, e
@@ -58,10 +57,7 @@ pub(super) async fn upload_timeline_checkpoint<

    let index = &remote_assets.1;

-    let ZTenantTimelineId {
-        tenant_id,
-        timeline_id,
-    } = sync_id;
+    let TimelineSyncId(tenant_id, timeline_id) = sync_id;
    let timeline_dir = config.timeline_path(&timeline_id, &tenant_id);

    let index_read = index.read().await;
@@ -155,14 +151,11 @@ async fn try_upload_checkpoint<
 >(
    config: &'static PageServerConf,
    remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
-    sync_id: ZTenantTimelineId,
+    sync_id: TimelineSyncId,
    new_checkpoint: &NewCheckpoint,
    files_to_skip: BTreeSet<PathBuf>,
 ) -> anyhow::Result<(ArchiveHeader, u64)> {
-    let ZTenantTimelineId {
-        tenant_id,
-        timeline_id,
-    } = sync_id;
+    let TimelineSyncId(tenant_id, timeline_id) = sync_id;
    let timeline_dir = config.timeline_path(&timeline_id, &tenant_id);

    let files_to_upload = new_checkpoint
@@ -295,7 +288,7 @@ mod tests {
    #[tokio::test]
    async fn reupload_timeline() -> anyhow::Result<()> {
        let repo_harness = RepoHarness::create("reupload_timeline")?;
-        let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
+        let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
        let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
        let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
            repo_harness.conf,
@@ -491,7 +484,7 @@ mod tests {
    #[tokio::test]
    async fn reupload_timeline_rejected() -> anyhow::Result<()> {
        let repo_harness = RepoHarness::create("reupload_timeline_rejected")?;
-        let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
+        let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
        let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
        let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
            repo_harness.conf,
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -306,12 +306,8 @@ pub enum ZenithWalRecord {
    /// Native PostgreSQL WAL record
    Postgres { will_init: bool, rec: Bytes },

-    /// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)
-    ClearVisibilityMapFlags {
-        new_heap_blkno: Option<u32>,
-        old_heap_blkno: Option<u32>,
-        flags: u8,
-    },
+    /// Set bits in heap visibility map. (heap blkno, flag bits to clear)
+    ClearVisibilityMapFlags { heap_blkno: u32, flags: u8 },
    /// Mark transaction IDs as committed on a CLOG page
    ClogSetCommitted { xids: Vec<TransactionId> },
    /// Mark transaction IDs as aborted on a CLOG page
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -332,11 +332,8 @@ impl VirtualFile {
        // TODO: We could downgrade the locks to read mode before calling
        // 'func', to allow a little bit more concurrency, but the standard
        // library RwLock doesn't allow downgrading without releasing the lock,
-        // and that doesn't seem worth the trouble.
-        //
-        // XXX: `parking_lot::RwLock` can enable such downgrades, yet its implemenation is fair and
-        // may deadlock on subsequent read calls.
-        // Simply replacing all `RwLock` in project causes deadlocks, so use it sparingly.
+        // and that doesn't seem worth the trouble. (parking_lot RwLock would
+        // allow it)
        let result = STORAGE_IO_TIME
            .with_label_values(&[op, &self.tenantid, &self.timelineid])
            .observe_closure_duration(|| func(&file));
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -349,25 +349,49 @@ impl WalIngest {
        decoded: &mut DecodedWALRecord,
    ) -> Result<()> {
        // Handle VM bit updates that are implicitly part of heap records.
-
-        // First, look at the record to determine which VM bits need
-        // to be cleared. If either of these variables is set, we
-        // need to clear the corresponding bits in the visibility map.
-        let mut new_heap_blkno: Option<u32> = None;
-        let mut old_heap_blkno: Option<u32> = None;
        if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
            let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
            if info == pg_constants::XLOG_HEAP_INSERT {
                let xlrec = XlHeapInsert::decode(buf);
                assert_eq!(0, buf.remaining());
-                if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
-                    new_heap_blkno = Some(decoded.blocks[0].blkno);
+                if (xlrec.flags
+                    & (pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED
+                        | pg_constants::XLH_INSERT_ALL_FROZEN_SET))
+                    != 0
+                {
+                    timeline.put_wal_record(
+                        lsn,
+                        RelishTag::Relation(RelTag {
+                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
+                            spcnode: decoded.blocks[0].rnode_spcnode,
+                            dbnode: decoded.blocks[0].rnode_dbnode,
+                            relnode: decoded.blocks[0].rnode_relnode,
+                        }),
+                        decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
+                        ZenithWalRecord::ClearVisibilityMapFlags {
+                            heap_blkno: decoded.blocks[0].blkno,
+                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
+                        },
+                    )?;
                }
            } else if info == pg_constants::XLOG_HEAP_DELETE {
                let xlrec = XlHeapDelete::decode(buf);
                assert_eq!(0, buf.remaining());
                if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
-                    new_heap_blkno = Some(decoded.blocks[0].blkno);
+                    timeline.put_wal_record(
+                        lsn,
+                        RelishTag::Relation(RelTag {
+                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
+                            spcnode: decoded.blocks[0].rnode_spcnode,
+                            dbnode: decoded.blocks[0].rnode_dbnode,
+                            relnode: decoded.blocks[0].rnode_relnode,
+                        }),
+                        decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
+                        ZenithWalRecord::ClearVisibilityMapFlags {
+                            heap_blkno: decoded.blocks[0].blkno,
+                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
+                        },
+                    )?;
                }
            } else if info == pg_constants::XLOG_HEAP_UPDATE
                || info == pg_constants::XLOG_HEAP_HOT_UPDATE
@@ -376,15 +400,39 @@ impl WalIngest {
                // the size of tuple data is inferred from the size of the record.
                // we can't validate the remaining number of bytes without parsing
                // the tuple data.
-                if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
-                    old_heap_blkno = Some(decoded.blocks[0].blkno);
-                }
                if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
-                    // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
-                    // non-HOT update where the new tuple goes to different page than
-                    // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
-                    // set.
-                    new_heap_blkno = Some(decoded.blocks[1].blkno);
+                    timeline.put_wal_record(
+                        lsn,
+                        RelishTag::Relation(RelTag {
+                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
+                            spcnode: decoded.blocks[0].rnode_spcnode,
+                            dbnode: decoded.blocks[0].rnode_dbnode,
+                            relnode: decoded.blocks[0].rnode_relnode,
+                        }),
+                        decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
+                        ZenithWalRecord::ClearVisibilityMapFlags {
+                            heap_blkno: decoded.blocks[0].blkno,
+                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
+                        },
+                    )?;
+                }
+                if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0
+                    && decoded.blocks.len() > 1
+                {
+                    timeline.put_wal_record(
+                        lsn,
+                        RelishTag::Relation(RelTag {
+                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
+                            spcnode: decoded.blocks[1].rnode_spcnode,
+                            dbnode: decoded.blocks[1].rnode_dbnode,
+                            relnode: decoded.blocks[1].rnode_relnode,
+                        }),
+                        decoded.blocks[1].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
+                        ZenithWalRecord::ClearVisibilityMapFlags {
+                            heap_blkno: decoded.blocks[1].blkno,
+                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
+                        },
+                    )?;
                }
            }
        } else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
@@ -400,67 +448,32 @@ impl WalIngest {
                };
                assert_eq!(offset_array_len, buf.remaining());

-                if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
-                    new_heap_blkno = Some(decoded.blocks[0].blkno);
+                // FIXME: why also ALL_FROZEN_SET?
+                if (xlrec.flags
+                    & (pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED
+                        | pg_constants::XLH_INSERT_ALL_FROZEN_SET))
+                    != 0
+                {
+                    timeline.put_wal_record(
+                        lsn,
+                        RelishTag::Relation(RelTag {
+                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
+                            spcnode: decoded.blocks[0].rnode_spcnode,
+                            dbnode: decoded.blocks[0].rnode_dbnode,
+                            relnode: decoded.blocks[0].rnode_relnode,
+                        }),
+                        decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
+                        ZenithWalRecord::ClearVisibilityMapFlags {
+                            heap_blkno: decoded.blocks[0].blkno,
+                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
+                        },
+                    )?;
                }
            }
        }
+
        // FIXME: What about XLOG_HEAP_LOCK and XLOG_HEAP2_LOCK_UPDATED?

-        // Clear the VM bits if required.
-        if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
-            let vm_relish = RelishTag::Relation(RelTag {
-                forknum: pg_constants::VISIBILITYMAP_FORKNUM,
-                spcnode: decoded.blocks[0].rnode_spcnode,
-                dbnode: decoded.blocks[0].rnode_dbnode,
-                relnode: decoded.blocks[0].rnode_relnode,
-            });
-
-            let new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
-            let old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
-            if new_vm_blk == old_vm_blk {
-                // An UPDATE record that needs to clear the bits for both old and the
-                // new page, both of which reside on the same VM page.
-                timeline.put_wal_record(
-                    lsn,
-                    vm_relish,
-                    new_vm_blk.unwrap(),
-                    ZenithWalRecord::ClearVisibilityMapFlags {
-                        new_heap_blkno,
-                        old_heap_blkno,
-                        flags: pg_constants::VISIBILITYMAP_VALID_BITS,
-                    },
-                )?;
-            } else {
-                // Clear VM bits for one heap page, or for two pages that reside on
-                // different VM pages.
-                if let Some(new_vm_blk) = new_vm_blk {
-                    timeline.put_wal_record(
-                        lsn,
-                        vm_relish,
-                        new_vm_blk,
-                        ZenithWalRecord::ClearVisibilityMapFlags {
-                            new_heap_blkno,
-                            old_heap_blkno: None,
-                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
-                        },
-                    )?;
-                }
-                if let Some(old_vm_blk) = old_vm_blk {
-                    timeline.put_wal_record(
-                        lsn,
-                        vm_relish,
-                        old_vm_blk,
-                        ZenithWalRecord::ClearVisibilityMapFlags {
-                            new_heap_blkno: None,
-                            old_heap_blkno,
-                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
-                        },
-                    )?;
-                }
-            }
-        }
-
        Ok(())
    }

--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -12,15 +12,14 @@ use crate::thread_mgr::ThreadKind;
 use crate::walingest::WalIngest;
 use anyhow::{bail, Context, Error, Result};
 use bytes::BytesMut;
-use fail::fail_point;
 use lazy_static::lazy_static;
+use parking_lot::Mutex;
 use postgres_ffi::waldecoder::*;
 use postgres_protocol::message::backend::ReplicationMessage;
 use postgres_types::PgLsn;
 use std::cell::Cell;
 use std::collections::HashMap;
 use std::str::FromStr;
-use std::sync::Mutex;
 use std::thread_local;
 use std::time::SystemTime;
 use tokio::pin;
@@ -32,7 +31,6 @@ use zenith_utils::lsn::Lsn;
 use zenith_utils::pq_proto::ZenithFeedback;
 use zenith_utils::zid::ZTenantId;
 use zenith_utils::zid::ZTimelineId;
-
 //
 // We keep one WAL Receiver active per timeline.
 //
@@ -53,7 +51,7 @@ thread_local! {
 }

 fn drop_wal_receiver(tenantid: ZTenantId, timelineid: ZTimelineId) {
-    let mut receivers = WAL_RECEIVERS.lock().unwrap();
+    let mut receivers = WAL_RECEIVERS.lock();
    receivers.remove(&(tenantid, timelineid));
 }

@@ -64,7 +62,7 @@ pub fn launch_wal_receiver(
    timelineid: ZTimelineId,
    wal_producer_connstr: &str,
 ) -> Result<()> {
-    let mut receivers = WAL_RECEIVERS.lock().unwrap();
+    let mut receivers = WAL_RECEIVERS.lock();

    match receivers.get_mut(&(tenantid, timelineid)) {
        Some(receiver) => {
@@ -97,7 +95,7 @@ pub fn launch_wal_receiver(

 // Look up current WAL producer connection string in the hash table
 fn get_wal_producer_connstr(tenantid: ZTenantId, timelineid: ZTimelineId) -> String {
-    let receivers = WAL_RECEIVERS.lock().unwrap();
+    let receivers = WAL_RECEIVERS.lock();

    receivers
        .get(&(tenantid, timelineid))
@@ -162,7 +160,7 @@ fn walreceiver_main(
    // This is from tokio-postgres docs, but it is a bit weird in our case because we extensively use block_on
    runtime.spawn(async move {
        if let Err(e) = connection.await {
-            error!("connection error: {}", e);
+            eprintln!("connection error: {}", e);
        }
    });

@@ -256,8 +254,6 @@ fn walreceiver_main(
                    let writer = timeline.writer();
                    walingest.ingest_record(writer.as_ref(), recdata, lsn)?;

-                    fail_point!("walreceiver-after-ingest");
-
                    last_rec_lsn = lsn;
                }

--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -363,44 +363,25 @@ impl PostgresRedoManager {
                will_init: _,
                rec: _,
            } => panic!("tried to pass postgres wal record to zenith WAL redo"),
-            ZenithWalRecord::ClearVisibilityMapFlags {
-                new_heap_blkno,
-                old_heap_blkno,
-                flags,
-            } => {
-                // sanity check that this is modifying the correct relish
+            ZenithWalRecord::ClearVisibilityMapFlags { heap_blkno, flags } => {
+                // Calculate the VM block and offset that corresponds to the heap block.
+                let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(*heap_blkno);
+                let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(*heap_blkno);
+                let map_offset = pg_constants::HEAPBLK_TO_OFFSET(*heap_blkno);
+
+                // Check that we're modifying the correct VM block.
                assert!(
                    check_forknum(&rel, pg_constants::VISIBILITYMAP_FORKNUM),
                    "ClearVisibilityMapFlags record on unexpected rel {:?}",
                    rel
                );
-                if let Some(heap_blkno) = *new_heap_blkno {
-                    // Calculate the VM block and offset that corresponds to the heap block.
-                    let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);
-                    let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);
-                    let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);
+                assert!(map_block == blknum);

-                    // Check that we're modifying the correct VM block.
-                    assert!(map_block == blknum);
+                // equivalent to PageGetContents(page)
+                let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];

-                    // equivalent to PageGetContents(page)
-                    let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
-
-                    map[map_byte as usize] &= !(flags << map_offset);
-                }
-
-                // Repeat for 'old_heap_blkno', if any
-                if let Some(heap_blkno) = *old_heap_blkno {
-                    let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);
-                    let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);
-                    let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);
-
-                    assert!(map_block == blknum);
-
-                    let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
-
-                    map[map_byte as usize] &= !(flags << map_offset);
-                }
+                let mask: u8 = flags << map_offset;
+                map[map_byte as usize] &= !mask;
            }
            // Non-relational WAL records are handled here, with custom code that has the
            // same effects as the corresponding Postgres WAL redo function.
--- a/postgres_ffi/Cargo.toml
+++ b/postgres_ffi/Cargo.toml
@@ -1,8 +1,11 @@
 [package]
 name = "postgres_ffi"
 version = "0.1.0"
+authors = ["Heikki Linnakangas <heikki@zenith.tech>"]
 edition = "2021"

+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
 [dependencies]
 chrono = "0.4.19"
 rand = "0.8.3"
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -1,32 +1,38 @@
 [package]
 name = "proxy"
 version = "0.1.0"
+authors = ["Stas Kelvich <stas.kelvich@gmail.com>"]
 edition = "2021"

+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
 [dependencies]
 anyhow = "1.0"
 bytes = { version = "1.0.1", features = ['serde'] }
-clap = "3.0"
-futures = "0.3.13"
-hashbrown = "0.11.2"
-hex = "0.4.3"
-hyper = "0.14"
 lazy_static = "1.4.0"
 md5 = "0.7.0"
-parking_lot = "0.11.2"
-pin-project-lite = "0.2.7"
 rand = "0.8.3"
-reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
-rustls = "0.19.1"
-scopeguard = "1.1.0"
+hex = "0.4.3"
+hyper = "0.14"
+routerify = "2"
+parking_lot = "0.11.2"
+hashbrown = "0.11.2"
 serde = "1"
 serde_json = "1"
 tokio = { version = "1.11", features = ["macros"] }
 tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
 tokio-rustls = "0.22.0"
+clap = "2.33.0"
+rustls = "0.19.1"
+reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
+pin-project-lite = "0.2.7"
+futures = "0.3.13"
+scopeguard = "1.1.0"

 zenith_utils = { path = "../zenith_utils" }
 zenith_metrics = { path = "../zenith_metrics" }
+base64 = "0.13.0"
+async-trait = "0.1.52"

 [dev-dependencies]
 tokio-postgres-rustls = "0.8.0"
--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -1,169 +1,41 @@
-use crate::compute::DatabaseInfo;
-use crate::config::ProxyConfig;
-use crate::cplane_api::{self, CPlaneApi};
+use crate::db::AuthSecret;
 use crate::stream::PqStream;
-use anyhow::{anyhow, bail, Context};
-use std::collections::HashMap;
+use bytes::Bytes;
 use tokio::io::{AsyncRead, AsyncWrite};
-use zenith_utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage, FeMessage as Fe};
+use zenith_utils::pq_proto::BeMessage as Be;

-/// Various client credentials which we use for authentication.
-#[derive(Debug, PartialEq, Eq)]
-pub struct ClientCredentials {
-    pub user: String,
-    pub dbname: String,
+
+/// Stored secret for authenticating the user via md5 but authenticating
+/// to the compute database with a (possibly different) plaintext password.
+pub struct PlaintextStoredSecret {
+    pub salt: [u8; 4],
+    pub hashed_salted_password: Bytes,
+    pub compute_db_password: String,
 }

-impl TryFrom<HashMap<String, String>> for ClientCredentials {
-    type Error = anyhow::Error;
+/// Sufficient information to auth user and create AuthSecret
+#[non_exhaustive]
+pub enum StoredSecret {
+    PlaintextPassword(PlaintextStoredSecret),
+    // TODO add md5 option?
+    // TODO add SCRAM option
+}

-    fn try_from(mut value: HashMap<String, String>) -> Result<Self, Self::Error> {
-        let mut get_param = |key| {
-            value
-                .remove(key)
-                .with_context(|| format!("{} is missing in startup packet", key))
-        };
-
-        let user = get_param("user")?;
-        let db = get_param("database")?;
-
-        Ok(Self { user, dbname: db })
+pub async fn authenticate(
+    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
+    stored_secret: StoredSecret
+) -> anyhow::Result<AuthSecret> {
+    match stored_secret {
+        StoredSecret::PlaintextPassword(stored) => {
+            client.write_message(&Be::AuthenticationMD5Password(&stored.salt)).await?;
+            let provided = client.read_password_message().await?;
+            anyhow::ensure!(provided == stored.hashed_salted_password);
+            Ok(AuthSecret::Password(stored.compute_db_password))
+        },
    }
 }

-impl ClientCredentials {
-    /// Use credentials to authenticate the user.
-    pub async fn authenticate(
-        self,
-        config: &ProxyConfig,
-        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-    ) -> anyhow::Result<DatabaseInfo> {
-        use crate::config::ClientAuthMethod::*;
-        use crate::config::RouterConfig::*;
-        let db_info = match &config.router_config {
-            Static { host, port } => handle_static(host.clone(), *port, client, self).await,
-            Dynamic(Mixed) => {
-                if self.user.ends_with("@zenith") {
-                    handle_existing_user(config, client, self).await
-                } else {
-                    handle_new_user(config, client).await
-                }
-            }
-            Dynamic(Password) => handle_existing_user(config, client, self).await,
-            Dynamic(Link) => handle_new_user(config, client).await,
-        };
-
-        db_info.context("failed to authenticate client")
-    }
-}
-
-fn new_psql_session_id() -> String {
-    hex::encode(rand::random::<[u8; 8]>())
-}
-
-async fn handle_static(
-    host: String,
-    port: u16,
-    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-    creds: ClientCredentials,
-) -> anyhow::Result<DatabaseInfo> {
-    client
-        .write_message(&Be::AuthenticationCleartextPassword)
-        .await?;
-
-    // Read client's password bytes
-    let msg = match client.read_message().await? {
-        Fe::PasswordMessage(msg) => msg,
-        bad => bail!("unexpected message type: {:?}", bad),
-    };
-
-    let cleartext_password = std::str::from_utf8(&msg)?.split('\0').next().unwrap();
-
-    let db_info = DatabaseInfo {
-        host,
-        port,
-        dbname: creds.dbname.clone(),
-        user: creds.user.clone(),
-        password: Some(cleartext_password.into()),
-    };
-
-    client
-        .write_message_noflush(&Be::AuthenticationOk)?
-        .write_message_noflush(&BeParameterStatusMessage::encoding())?;
-
-    Ok(db_info)
-}
-
-async fn handle_existing_user(
-    config: &ProxyConfig,
-    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-    creds: ClientCredentials,
-) -> anyhow::Result<DatabaseInfo> {
-    let psql_session_id = new_psql_session_id();
-    let md5_salt = rand::random();
-
-    client
-        .write_message(&Be::AuthenticationMD5Password(&md5_salt))
-        .await?;
-
-    // Read client's password hash
-    let msg = match client.read_message().await? {
-        Fe::PasswordMessage(msg) => msg,
-        bad => bail!("unexpected message type: {:?}", bad),
-    };
-
-    let (_trailing_null, md5_response) = msg
-        .split_last()
-        .ok_or_else(|| anyhow!("unexpected password message"))?;
-
-    let cplane = CPlaneApi::new(&config.auth_endpoint);
-    let db_info = cplane
-        .authenticate_proxy_request(creds, md5_response, &md5_salt, &psql_session_id)
-        .await?;
-
-    client
-        .write_message_noflush(&Be::AuthenticationOk)?
-        .write_message_noflush(&BeParameterStatusMessage::encoding())?;
-
-    Ok(db_info)
-}
-
-async fn handle_new_user(
-    config: &ProxyConfig,
-    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-) -> anyhow::Result<DatabaseInfo> {
-    let psql_session_id = new_psql_session_id();
-    let greeting = hello_message(&config.redirect_uri, &psql_session_id);
-
-    let db_info = cplane_api::with_waiter(psql_session_id, |waiter| async {
-        // Give user a URL to spawn a new database
-        client
-            .write_message_noflush(&Be::AuthenticationOk)?
-            .write_message_noflush(&BeParameterStatusMessage::encoding())?
-            .write_message(&Be::NoticeResponse(greeting))
-            .await?;
-
-        // Wait for web console response
-        waiter.await?.map_err(|e| anyhow!(e))
-    })
-    .await?;
-
-    client.write_message_noflush(&Be::NoticeResponse("Connecting to database.".into()))?;
-
-    Ok(db_info)
-}
-
-fn hello_message(redirect_uri: &str, session_id: &str) -> String {
-    format!(
-        concat![
-            "☀️  Welcome to Zenith!\n",
-            "To proceed with database creation, open the following link:\n\n",
-            "    {redirect_uri}{session_id}\n\n",
-            "It needs to be done once and we will send you '.pgpass' file,\n",
-            "which will allow you to access or create ",
-            "databases without opening your web browser."
-        ],
-        redirect_uri = redirect_uri,
-        session_id = session_id,
-    )
+#[async_trait::async_trait]
+pub trait SecretStore {
+    async fn get_stored_secret(&self, creds: &crate::cplane_api::ClientCredentials) -> anyhow::Result<StoredSecret>;
 }
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -1,57 +1,15 @@
 use anyhow::{anyhow, Context};
 use hashbrown::HashMap;
+use lazy_static::lazy_static;
 use parking_lot::Mutex;
 use std::net::SocketAddr;
 use tokio::net::TcpStream;
 use tokio_postgres::{CancelToken, NoTls};
 use zenith_utils::pq_proto::CancelKeyData;

-/// Enables serving CancelRequests.
-#[derive(Default)]
-pub struct CancelMap(Mutex<HashMap<CancelKeyData, Option<CancelClosure>>>);
-
-impl CancelMap {
-    /// Cancel a running query for the corresponding connection.
-    pub async fn cancel_session(&self, key: CancelKeyData) -> anyhow::Result<()> {
-        let cancel_closure = self
-            .0
-            .lock()
-            .get(&key)
-            .and_then(|x| x.clone())
-            .with_context(|| format!("unknown session: {:?}", key))?;
-
-        cancel_closure.try_cancel_query().await
-    }
-
-    /// Run async action within an ephemeral session identified by [`CancelKeyData`].
-    pub async fn with_session<'a, F, R, V>(&'a self, f: F) -> anyhow::Result<V>
-    where
-        F: FnOnce(Session<'a>) -> R,
-        R: std::future::Future<Output = anyhow::Result<V>>,
-    {
-        // HACK: We'd rather get the real backend_pid but tokio_postgres doesn't
-        // expose it and we don't want to do another roundtrip to query
-        // for it. The client will be able to notice that this is not the
-        // actual backend_pid, but backend_pid is not used for anything
-        // so it doesn't matter.
-        let key = rand::random();
-
-        // Random key collisions are unlikely to happen here, but they're still possible,
-        // which is why we have to take care not to rewrite an existing key.
-        self.0
-            .lock()
-            .try_insert(key, None)
-            .map_err(|_| anyhow!("session already exists: {:?}", key))?;
-
-        // This will guarantee that the session gets dropped
-        // as soon as the future is finished.
-        scopeguard::defer! {
-            self.0.lock().remove(&key);
-        }
-
-        let session = Session::new(key, self);
-        f(session).await
-    }
+lazy_static! {
+    /// Enables serving CancelRequests.
+    static ref CANCEL_MAP: Mutex<HashMap<CancelKeyData, Option<CancelClosure>>> = Default::default();
 }

 /// This should've been a [`std::future::Future`], but
@@ -80,27 +38,53 @@ impl CancelClosure {
    }
 }

+/// Cancel a running query for the corresponding connection.
+pub async fn cancel_session(key: CancelKeyData) -> anyhow::Result<()> {
+    let cancel_closure = CANCEL_MAP
+        .lock()
+        .get(&key)
+        .and_then(|x| x.clone())
+        .with_context(|| format!("unknown session: {:?}", key))?;
+
+    cancel_closure.try_cancel_query().await
+}
+
 /// Helper for registering query cancellation tokens.
-pub struct Session<'a> {
-    /// The user-facing key identifying this session.
-    key: CancelKeyData,
-    /// The [`CancelMap`] this session belongs to.
-    cancel_map: &'a CancelMap,
-}
-
-impl<'a> Session<'a> {
-    fn new(key: CancelKeyData, cancel_map: &'a CancelMap) -> Self {
-        Self { key, cancel_map }
-    }
+pub struct Session(CancelKeyData);

+impl Session {
    /// Store the cancel token for the given session.
-    /// This enables query cancellation in [`crate::proxy::handshake`].
    pub fn enable_cancellation(self, cancel_closure: CancelClosure) -> CancelKeyData {
-        self.cancel_map
-            .0
-            .lock()
-            .insert(self.key, Some(cancel_closure));
-
-        self.key
+        CANCEL_MAP.lock().insert(self.0, Some(cancel_closure));
+        self.0
    }
 }
+
+/// Run async action within an ephemeral session identified by [`CancelKeyData`].
+pub async fn with_session<F, R, V>(f: F) -> anyhow::Result<V>
+where
+    F: FnOnce(Session) -> R,
+    R: std::future::Future<Output = anyhow::Result<V>>,
+{
+    // HACK: We'd rather get the real backend_pid but tokio_postgres doesn't
+    // expose it and we don't want to do another roundtrip to query
+    // for it. The client will be able to notice that this is not the
+    // actual backend_pid, but backend_pid is not used for anything
+    // so it doesn't matter.
+    let key = rand::random();
+
+    // The birthday problem is unlikely to happen here, but it's still possible
+    CANCEL_MAP
+        .lock()
+        .try_insert(key, None)
+        .map_err(|_| anyhow!("session already exists: {:?}", key))?;
+
+    // This will guarantee that the session gets dropped
+    // as soon as the future is finished.
+    scopeguard::defer! {
+        CANCEL_MAP.lock().remove(&key);
+    }
+
+    let session = Session(key);
+    f(session).await
+}
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -1,42 +1,7 @@
-use anyhow::Context;
-use serde::{Deserialize, Serialize};
-use std::net::{SocketAddr, ToSocketAddrs};
+use crate::{cplane_api::ClientCredentials, db::DatabaseConnInfo};

-/// Compute node connection params.
-#[derive(Serialize, Deserialize, Debug, Default)]
-pub struct DatabaseInfo {
-    pub host: String,
-    pub port: u16,
-    pub dbname: String,
-    pub user: String,
-    pub password: Option<String>,
-}
-
-impl DatabaseInfo {
-    pub fn socket_addr(&self) -> anyhow::Result<SocketAddr> {
-        let host_port = format!("{}:{}", self.host, self.port);
-        host_port
-            .to_socket_addrs()
-            .with_context(|| format!("cannot resolve {} to SocketAddr", host_port))?
-            .next()
-            .context("cannot resolve at least one SocketAddr")
-    }
-}
-
-impl From<DatabaseInfo> for tokio_postgres::Config {
-    fn from(db_info: DatabaseInfo) -> Self {
-        let mut config = tokio_postgres::Config::new();
-
-        config
-            .host(&db_info.host)
-            .port(db_info.port)
-            .dbname(&db_info.dbname)
-            .user(&db_info.user);
-
-        if let Some(password) = db_info.password {
-            config.password(password);
-        }
-
-        config
-    }
+
+#[async_trait::async_trait]
+pub trait ComputeProvider {
+    async fn get_compute_node(&self, creds: &ClientCredentials) -> anyhow::Result<DatabaseConnInfo>;
 }
--- a/proxy/src/cplane_api.rs
+++ b/proxy/src/cplane_api.rs
@@ -1,79 +1,42 @@
-use crate::auth::ClientCredentials;
-use crate::compute::DatabaseInfo;
-use crate::waiters::{Waiter, Waiters};
-use anyhow::{anyhow, bail};
-use lazy_static::lazy_static;
+use anyhow::{anyhow, bail, Context};
 use serde::{Deserialize, Serialize};
+use std::net::{SocketAddr, ToSocketAddrs};
+use std::collections::HashMap;

-lazy_static! {
-    static ref CPLANE_WAITERS: Waiters<Result<DatabaseInfo, String>> = Default::default();
+use crate::state::ProxyWaiters;
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct ClientCredentials {
+    pub user: String,
+    pub dbname: String,
 }

-/// Give caller an opportunity to wait for cplane's reply.
-pub async fn with_waiter<F, R, T>(psql_session_id: impl Into<String>, f: F) -> anyhow::Result<T>
-where
-    F: FnOnce(Waiter<'static, Result<DatabaseInfo, String>>) -> R,
-    R: std::future::Future<Output = anyhow::Result<T>>,
-{
-    let waiter = CPLANE_WAITERS.register(psql_session_id.into())?;
-    f(waiter).await
-}
+impl TryFrom<HashMap<String, String>> for ClientCredentials {
+    type Error = anyhow::Error;

-pub fn notify(psql_session_id: &str, msg: Result<DatabaseInfo, String>) -> anyhow::Result<()> {
-    CPLANE_WAITERS.notify(psql_session_id, msg)
-}
+    fn try_from(mut value: HashMap<String, String>) -> Result<Self, Self::Error> {
+        let mut get_param = |key| {
+            value
+                .remove(key)
+                .with_context(|| format!("{} is missing in startup packet", key))
+        };

-/// Zenith console API wrapper.
-pub struct CPlaneApi<'a> {
-    auth_endpoint: &'a str,
-}
+        let user = get_param("user")?;
+        let db = get_param("database")?;

-impl<'a> CPlaneApi<'a> {
-    pub fn new(auth_endpoint: &'a str) -> Self {
-        Self { auth_endpoint }
+        Ok(Self { user, dbname: db })
    }
 }

-impl CPlaneApi<'_> {
-    pub async fn authenticate_proxy_request(
-        &self,
-        creds: ClientCredentials,
-        md5_response: &[u8],
-        salt: &[u8; 4],
-        psql_session_id: &str,
-    ) -> anyhow::Result<DatabaseInfo> {
-        let mut url = reqwest::Url::parse(self.auth_endpoint)?;
-        url.query_pairs_mut()
-            .append_pair("login", &creds.user)
-            .append_pair("database", &creds.dbname)
-            .append_pair("md5response", std::str::from_utf8(md5_response)?)
-            .append_pair("salt", &hex::encode(salt))
-            .append_pair("psql_session_id", psql_session_id);
-
-        with_waiter(psql_session_id, |waiter| async {
-            println!("cplane request: {}", url);
-            // TODO: leverage `reqwest::Client` to reuse connections
-            let resp = reqwest::get(url).await?;
-            if !resp.status().is_success() {
-                bail!("Auth failed: {}", resp.status())
-            }
-
-            let auth_info: ProxyAuthResponse = serde_json::from_str(resp.text().await?.as_str())?;
-            println!("got auth info: #{:?}", auth_info);
-
-            use ProxyAuthResponse::*;
-            match auth_info {
-                Ready { conn_info } => Ok(conn_info),
-                Error { error } => bail!(error),
-                NotReady { .. } => waiter.await?.map_err(|e| anyhow!(e)),
-            }
-        })
-        .await
-    }
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct DatabaseInfo {
+    pub host: String,
+    pub port: u16,
+    pub dbname: String,
+    pub user: String,
+    pub password: Option<String>,
 }

-// NOTE: the order of constructors is important.
-// https://serde.rs/enum-representations.html#untagged
 #[derive(Serialize, Deserialize, Debug)]
 #[serde(untagged)]
 enum ProxyAuthResponse {
@@ -82,6 +45,57 @@ enum ProxyAuthResponse {
    NotReady { ready: bool }, // TODO: get rid of `ready`
 }

+pub struct CPlaneApi<'a> {
+    auth_endpoint: &'a str,
+    waiters: &'a ProxyWaiters,
+}
+
+impl<'a> CPlaneApi<'a> {
+    pub fn new(auth_endpoint: &'a str, waiters: &'a ProxyWaiters) -> Self {
+        Self {
+            auth_endpoint,
+            waiters,
+        }
+    }
+}
+
+impl CPlaneApi<'_> {
+    pub fn authenticate_proxy_request(
+        &self,
+        user: &str,
+        database: &str,
+        md5_response: &[u8],
+        salt: &[u8; 4],
+        psql_session_id: &str,
+    ) -> anyhow::Result<DatabaseInfo> {
+        let mut url = reqwest::Url::parse(self.auth_endpoint)?;
+        url.query_pairs_mut()
+            .append_pair("login", user)
+            .append_pair("database", database)
+            .append_pair("md5response", std::str::from_utf8(md5_response)?)
+            .append_pair("salt", &hex::encode(salt))
+            .append_pair("psql_session_id", psql_session_id);
+
+        let waiter = self.waiters.register(psql_session_id.to_owned());
+
+        println!("cplane request: {}", url);
+        let resp = reqwest::blocking::get(url)?;
+        if !resp.status().is_success() {
+            bail!("Auth failed: {}", resp.status())
+        }
+
+        let auth_info: ProxyAuthResponse = serde_json::from_str(resp.text()?.as_str())?;
+        println!("got auth info: #{:?}", auth_info);
+
+        use ProxyAuthResponse::*;
+        match auth_info {
+            Ready { conn_info } => Ok(conn_info),
+            Error { error } => bail!(error),
+            NotReady { .. } => waiter.wait()?.map_err(|e| anyhow!(e)),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/proxy/src/db.rs
+++ b/proxy/src/db.rs
@@ -0,0 +1,58 @@
+///
+/// Utils for connecting with the postgres dataabase.
+///
+
+use std::net::{SocketAddr, ToSocketAddrs};
+use anyhow::{Context, anyhow};
+
+use crate::cplane_api::ClientCredentials;
+
+pub struct DatabaseConnInfo {
+    pub host: String,
+    pub port: u16,
+}
+
+pub struct DatabaseAuthInfo {
+    pub conn_info: DatabaseConnInfo,
+    pub creds: ClientCredentials,
+    pub auth_secret: AuthSecret,
+}
+
+/// Sufficient information to auth with database
+#[non_exhaustive]
+#[derive(Debug)]
+pub enum AuthSecret {
+    Password(String),
+    // TODO add SCRAM option
+}
+
+impl From<DatabaseAuthInfo> for tokio_postgres::Config {
+    fn from(auth_info: DatabaseAuthInfo) -> Self {
+        let mut config = tokio_postgres::Config::new();
+
+        config
+            .host(&auth_info.conn_info.host)
+            .port(auth_info.conn_info.port)
+            .dbname(&auth_info.creds.dbname)
+            .user(&auth_info.creds.user);
+
+        match auth_info.auth_secret {
+            AuthSecret::Password(password) => {
+                config.password(password);
+            }
+        }
+
+        config
+    }
+}
+
+impl DatabaseConnInfo {
+    pub fn socket_addr(&self) -> anyhow::Result<SocketAddr> {
+        let host_port = format!("{}:{}", self.host, self.port);
+        host_port
+            .to_socket_addrs()
+            .with_context(|| format!("cannot resolve {} to SocketAddr", host_port))?
+            .next()
+            .ok_or_else(|| anyhow!("cannot resolve at least one SocketAddr"))
+    }
+}
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -1,10 +1,10 @@
 use anyhow::anyhow;
 use hyper::{Body, Request, Response, StatusCode};
+use routerify::RouterBuilder;
 use std::net::TcpListener;
 use zenith_utils::http::endpoint;
 use zenith_utils::http::error::ApiError;
 use zenith_utils::http::json::json_response;
-use zenith_utils::http::{RouterBuilder, RouterService};

 async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    Ok(json_response(StatusCode::OK, "")?)
@@ -16,11 +16,7 @@ fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
 }

 pub async fn thread_main(http_listener: TcpListener) -> anyhow::Result<()> {
-    scopeguard::defer! {
-        println!("http has shut down");
-    }
-
-    let service = || RouterService::new(make_router().build()?);
+    let service = || routerify::RouterService::new(make_router().build()?);

    hyper::Server::from_tcp(http_listener)?
        .serve(service().map_err(|e| anyhow!(e))?)
--- a/proxy/src/main.rs
+++ b/proxy/src/main.rs
@@ -5,162 +5,126 @@
 /// (control plane API in our case) and can create new databases and accounts
 /// in somewhat transparent manner (again via communication with control plane API).
 ///
-use anyhow::{bail, Context};
+use anyhow::bail;
 use clap::{App, Arg};
-use config::ProxyConfig;
-use futures::FutureExt;
-use std::future::Future;
-use tokio::{net::TcpListener, task::JoinError};
-use zenith_utils::GIT_VERSION;
+use state::{ProxyConfig, ProxyState};
+use zenith_utils::{tcp_listener, GIT_VERSION};

-use crate::config::{ClientAuthMethod, RouterConfig};
-
-mod auth;
-mod cancellation;
 mod compute;
-mod config;
+mod mock;
+mod auth;
+mod db;
+mod cancellation;
 mod cplane_api;
 mod http;
 mod mgmt;
 mod proxy;
+mod state;
 mod stream;
 mod waiters;

-/// Flattens Result<Result<T>> into Result<T>.
-async fn flatten_err(
-    f: impl Future<Output = Result<anyhow::Result<()>, JoinError>>,
-) -> anyhow::Result<()> {
-    f.map(|r| r.context("join error").and_then(|x| x)).await
-}
-
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    zenith_metrics::set_common_metrics_prefix("zenith_proxy");
    let arg_matches = App::new("Zenith proxy/router")
        .version(GIT_VERSION)
        .arg(
-            Arg::new("proxy")
-                .short('p')
+            Arg::with_name("proxy")
+                .short("p")
                .long("proxy")
                .takes_value(true)
                .help("listen for incoming client connections on ip:port")
                .default_value("127.0.0.1:4432"),
        )
        .arg(
-            Arg::new("auth-method")
-                .long("auth-method")
-                .takes_value(true)
-                .help("Possible values: password | link | mixed")
-                .default_value("mixed"),
-        )
-        .arg(
-            Arg::new("static-router")
-                .short('s')
-                .long("static-router")
-                .takes_value(true)
-                .help("Route all clients to host:port"),
-        )
-        .arg(
-            Arg::new("mgmt")
-                .short('m')
+            Arg::with_name("mgmt")
+                .short("m")
                .long("mgmt")
                .takes_value(true)
                .help("listen for management callback connection on ip:port")
                .default_value("127.0.0.1:7000"),
        )
        .arg(
-            Arg::new("http")
-                .short('h')
+            Arg::with_name("http")
+                .short("h")
                .long("http")
                .takes_value(true)
                .help("listen for incoming http connections (metrics, etc) on ip:port")
                .default_value("127.0.0.1:7001"),
        )
        .arg(
-            Arg::new("uri")
-                .short('u')
+            Arg::with_name("uri")
+                .short("u")
                .long("uri")
                .takes_value(true)
                .help("redirect unauthenticated users to given uri")
                .default_value("http://localhost:3000/psql_session/"),
        )
        .arg(
-            Arg::new("auth-endpoint")
-                .short('a')
+            Arg::with_name("auth-endpoint")
+                .short("a")
                .long("auth-endpoint")
                .takes_value(true)
                .help("API endpoint for authenticating users")
                .default_value("http://localhost:3000/authenticate_proxy_request/"),
        )
        .arg(
-            Arg::new("ssl-key")
-                .short('k')
+            Arg::with_name("ssl-key")
+                .short("k")
                .long("ssl-key")
                .takes_value(true)
                .help("path to SSL key for client postgres connections"),
        )
        .arg(
-            Arg::new("ssl-cert")
-                .short('c')
+            Arg::with_name("ssl-cert")
+                .short("c")
                .long("ssl-cert")
                .takes_value(true)
                .help("path to SSL cert for client postgres connections"),
        )
        .get_matches();

-    let tls_config = match (
+    let ssl_config = match (
        arg_matches.value_of("ssl-key"),
        arg_matches.value_of("ssl-cert"),
    ) {
-        (Some(key_path), Some(cert_path)) => Some(config::configure_ssl(key_path, cert_path)?),
+        (Some(key_path), Some(cert_path)) => {
+            Some(crate::state::configure_ssl(key_path, cert_path)?)
+        }
        (None, None) => None,
        _ => bail!("either both or neither ssl-key and ssl-cert must be specified"),
    };

-    let auth_method = arg_matches.value_of("auth-method").unwrap().parse()?;
-    let router_config = match arg_matches.value_of("static-router") {
-        None => RouterConfig::Dynamic(auth_method),
-        Some(addr) => {
-            if let ClientAuthMethod::Password = auth_method {
-                let (host, port) = addr.split_once(":").unwrap();
-                RouterConfig::Static {
-                    host: host.to_string(),
-                    port: port.parse().unwrap(),
-                }
-            } else {
-                bail!("static-router requires --auth-method password")
-            }
-        }
-    };
-
-    let config: &ProxyConfig = Box::leak(Box::new(ProxyConfig {
-        router_config,
+    let config = ProxyConfig {
        proxy_address: arg_matches.value_of("proxy").unwrap().parse()?,
        mgmt_address: arg_matches.value_of("mgmt").unwrap().parse()?,
        http_address: arg_matches.value_of("http").unwrap().parse()?,
        redirect_uri: arg_matches.value_of("uri").unwrap().parse()?,
        auth_endpoint: arg_matches.value_of("auth-endpoint").unwrap().parse()?,
-        tls_config,
-    }));
+        ssl_config,
+    };
+    let state: &ProxyState = Box::leak(Box::new(ProxyState::new(config)));

    println!("Version: {}", GIT_VERSION);

    // Check that we can bind to address before further initialization
-    println!("Starting http on {}", config.http_address);
-    let http_listener = TcpListener::bind(config.http_address).await?.into_std()?;
+    println!("Starting http on {}", state.conf.http_address);
+    let http_listener = tcp_listener::bind(state.conf.http_address)?;

-    println!("Starting mgmt on {}", config.mgmt_address);
-    let mgmt_listener = TcpListener::bind(config.mgmt_address).await?.into_std()?;
+    println!("Starting proxy on {}", state.conf.proxy_address);
+    let proxy_listener = tokio::net::TcpListener::bind(state.conf.proxy_address).await?;

-    println!("Starting proxy on {}", config.proxy_address);
-    let proxy_listener = TcpListener::bind(config.proxy_address).await?;
+    println!("Starting mgmt on {}", state.conf.mgmt_address);
+    let mgmt_listener = tcp_listener::bind(state.conf.mgmt_address)?;

    let http = tokio::spawn(http::thread_main(http_listener));
-    let proxy = tokio::spawn(proxy::thread_main(config, proxy_listener));
-    let mgmt = tokio::task::spawn_blocking(move || mgmt::thread_main(mgmt_listener));
+    let proxy = tokio::spawn(proxy::thread_main(state, proxy_listener));
+    let mgmt = tokio::task::spawn_blocking(move || mgmt::thread_main(state, mgmt_listener));

-    let tasks = [flatten_err(http), flatten_err(proxy), flatten_err(mgmt)];
-    let _: Vec<()> = futures::future::try_join_all(tasks).await?;
+    let _ = futures::future::try_join_all([http, proxy, mgmt])
+        .await?
+        .into_iter()
+        .collect::<Result<Vec<()>, _>>()?;

    Ok(())
 }
--- a/proxy/src/mgmt.rs
+++ b/proxy/src/mgmt.rs
@@ -1,49 +1,44 @@
-use crate::{compute::DatabaseInfo, cplane_api};
-use anyhow::Context;
-use serde::Deserialize;
 use std::{
    net::{TcpListener, TcpStream},
    thread,
 };
+
+use serde::Deserialize;
 use zenith_utils::{
    postgres_backend::{self, AuthType, PostgresBackend},
    pq_proto::{BeMessage, SINGLE_COL_ROWDESC},
 };

+use crate::{cplane_api::DatabaseInfo, ProxyState};
+
 ///
 /// Main proxy listener loop.
 ///
 /// Listens for connections, and launches a new handler thread for each.
 ///
-pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
-    scopeguard::defer! {
-        println!("mgmt has shut down");
-    }
-
-    listener
-        .set_nonblocking(false)
-        .context("failed to set listener to blocking")?;
+pub fn thread_main(state: &'static ProxyState, listener: TcpListener) -> anyhow::Result<()> {
    loop {
-        let (socket, peer_addr) = listener.accept().context("failed to accept a new client")?;
+        let (socket, peer_addr) = listener.accept()?;
        println!("accepted connection from {}", peer_addr);
-        socket
-            .set_nodelay(true)
-            .context("failed to set client socket option")?;
+        socket.set_nodelay(true).unwrap();

        thread::spawn(move || {
-            if let Err(err) = handle_connection(socket) {
+            if let Err(err) = handle_connection(state, socket) {
                println!("error: {}", err);
            }
        });
    }
 }

-fn handle_connection(socket: TcpStream) -> anyhow::Result<()> {
+fn handle_connection(state: &ProxyState, socket: TcpStream) -> anyhow::Result<()> {
+    let mut conn_handler = MgmtHandler { state };
    let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None, true)?;
-    pgbackend.run(&mut MgmtHandler)
+    pgbackend.run(&mut conn_handler)
 }

-struct MgmtHandler;
+struct MgmtHandler<'a> {
+    state: &'a ProxyState,
+}

 /// Serialized examples:
 // {
@@ -79,13 +74,13 @@ enum PsqlSessionResult {
    Failure(String),
 }

-impl postgres_backend::Handler for MgmtHandler {
+impl postgres_backend::Handler for MgmtHandler<'_> {
    fn process_query(
        &mut self,
        pgb: &mut PostgresBackend,
        query_string: &str,
    ) -> anyhow::Result<()> {
-        let res = try_process_query(pgb, query_string);
+        let res = try_process_query(self, pgb, query_string);
        // intercept and log error message
        if res.is_err() {
            println!("Mgmt query failed: #{:?}", res);
@@ -94,7 +89,11 @@ impl postgres_backend::Handler for MgmtHandler {
    }
 }

-fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::Result<()> {
+fn try_process_query(
+    mgmt: &mut MgmtHandler,
+    pgb: &mut PostgresBackend,
+    query_string: &str,
+) -> anyhow::Result<()> {
    println!("Got mgmt query: '{}'", query_string);

    let resp: PsqlSessionResponse = serde_json::from_str(query_string)?;
@@ -105,7 +104,7 @@ fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::R
        Failure(message) => Err(message),
    };

-    match cplane_api::notify(&resp.session_id, msg) {
+    match mgmt.state.waiters.notify(&resp.session_id, msg) {
        Ok(()) => {
            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
                .write_message_noflush(&BeMessage::DataRow(&[Some(b"ok")]))?
--- a/proxy/src/mock.rs
+++ b/proxy/src/mock.rs
@@ -0,0 +1,32 @@
+use bytes::Bytes;
+
+use crate::{auth::{PlaintextStoredSecret, SecretStore, StoredSecret}, compute::ComputeProvider, cplane_api::ClientCredentials, db::DatabaseConnInfo};
+
+
+pub struct MockConsole {
+}
+
+#[async_trait::async_trait]
+impl SecretStore for MockConsole {
+    async fn get_stored_secret(&self, creds: &ClientCredentials) -> anyhow::Result<StoredSecret> {
+        let salt = [0; 4];
+        match (&creds.user[..], &creds.dbname[..]) {
+            ("postgres", "postgres") => Ok(StoredSecret::PlaintextPassword(PlaintextStoredSecret {
+                salt,
+                hashed_salted_password: "md52fff09cd9def51601fc5445943b3a11f\0".into(),
+                compute_db_password: "postgres".into(),
+            })),
+            _ => unimplemented!()
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl ComputeProvider for MockConsole{
+    async fn get_compute_node(&self, creds: &ClientCredentials) -> anyhow::Result<DatabaseConnInfo> {
+        return Ok(DatabaseConnInfo {
+            host: "127.0.0.1".into(),
+            port: 5432,
+        })
+    }
+}
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -1,11 +1,14 @@
-use crate::auth;
-use crate::cancellation::{self, CancelClosure, CancelMap};
-use crate::compute::DatabaseInfo;
-use crate::config::{ProxyConfig, TlsConfig};
-use crate::stream::{MetricsStream, PqStream, Stream};
+use crate::auth::{self, StoredSecret, SecretStore};
+use crate::cancellation::{self, CancelClosure};
+use crate::compute::ComputeProvider;
+use crate::cplane_api as cplane;
+use crate::db::{AuthSecret, DatabaseAuthInfo};
+use crate::mock::MockConsole;
+use crate::state::SslConfig;
+use crate::stream::{PqStream, Stream};
+use crate::ProxyState;
 use anyhow::{bail, Context};
 use lazy_static::lazy_static;
-use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::net::TcpStream;
 use tokio_postgres::NoTls;
@@ -30,44 +33,38 @@ lazy_static! {
    .unwrap();
 }

+pub async fn thread_main(
+    state: &'static ProxyState,
+    listener: tokio::net::TcpListener,
+) -> anyhow::Result<()> {
+    loop {
+        let (socket, peer_addr) = listener.accept().await?;
+        println!("accepted connection from {}", peer_addr);
+
+        tokio::spawn(log_error(async {
+            socket
+                .set_nodelay(true)
+                .context("failed to set socket option")?;
+
+            let tls = state.conf.ssl_config.clone();
+            handle_client(socket, tls).await
+        }));
+    }
+}
+
 async fn log_error<R, F>(future: F) -> F::Output
 where
    F: std::future::Future<Output = anyhow::Result<R>>,
 {
    future.await.map_err(|err| {
-        println!("error: {}", err);
+        println!("error: {}", err.to_string());
        err
    })
 }

-pub async fn thread_main(
-    config: &'static ProxyConfig,
-    listener: tokio::net::TcpListener,
-) -> anyhow::Result<()> {
-    scopeguard::defer! {
-        println!("proxy has shut down");
-    }
-
-    let cancel_map = Arc::new(CancelMap::default());
-    loop {
-        let (socket, peer_addr) = listener.accept().await?;
-        println!("accepted connection from {}", peer_addr);
-
-        let cancel_map = Arc::clone(&cancel_map);
-        tokio::spawn(log_error(async move {
-            socket
-                .set_nodelay(true)
-                .context("failed to set socket option")?;
-
-            handle_client(config, &cancel_map, socket).await
-        }));
-    }
-}
-
 async fn handle_client(
-    config: &ProxyConfig,
-    cancel_map: &CancelMap,
    stream: impl AsyncRead + AsyncWrite + Unpin,
+    tls: Option<SslConfig>,
 ) -> anyhow::Result<()> {
    // The `closed` counter will increase when this future is destroyed.
    NUM_CONNECTIONS_ACCEPTED_COUNTER.inc();
@@ -75,13 +72,11 @@ async fn handle_client(
        NUM_CONNECTIONS_CLOSED_COUNTER.inc();
    }

-    let tls = config.tls_config.clone();
-    if let Some((client, creds)) = handshake(stream, tls, cancel_map).await? {
-        cancel_map
-            .with_session(|session| async {
-                connect_client_to_db(config, session, client, creds).await
-            })
-            .await?;
+    if let Some((stream, creds)) = handshake(stream, tls).await? {
+        cancellation::with_session(|session| async {
+            connect_client_to_db(stream, creds, session).await
+        })
+        .await?;
    }

    Ok(())
@@ -92,9 +87,8 @@ async fn handle_client(
 /// any object satisfying the traits.
 async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    stream: S,
-    mut tls: Option<TlsConfig>,
-    cancel_map: &CancelMap,
-) -> anyhow::Result<Option<(PqStream<Stream<S>>, auth::ClientCredentials)>> {
+    mut tls: Option<SslConfig>,
+) -> anyhow::Result<Option<(PqStream<Stream<S>>, cplane::ClientCredentials)>> {
    // Client may try upgrading to each protocol only once
    let (mut tried_ssl, mut tried_gss) = (false, false);

@@ -142,7 +136,7 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                break Ok(Some((stream, params.try_into()?)));
            }
            CancelRequest(cancel_key_data) => {
-                cancel_map.cancel_session(cancel_key_data).await?;
+                cancellation::cancel_session(cancel_key_data).await?;

                break Ok(None);
            }
@@ -151,16 +145,30 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
 }

 async fn connect_client_to_db(
-    config: &ProxyConfig,
-    session: cancellation::Session<'_>,
    mut client: PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-    creds: auth::ClientCredentials,
+    creds: cplane::ClientCredentials,
+    session: cancellation::Session,
 ) -> anyhow::Result<()> {
-    let db_info = creds.authenticate(config, &mut client).await?;
-    let (db, version, cancel_closure) = connect_to_db(db_info).await?;
+    // Authenticate
+    // TODO use real console
+    let console = MockConsole {};
+    let stored_secret = console.get_stored_secret(&creds).await?;
+    let auth_secret = auth::authenticate(&mut client, stored_secret).await?;
+    let conn_info = console.get_compute_node(&creds).await?;
+    let db_auth_info = DatabaseAuthInfo {
+        conn_info,
+        creds,
+        auth_secret,
+    };
+
+    // Connect to db
+    let (mut db, version, cancel_closure) = connect_to_db(db_auth_info).await?;
    let cancel_key_data = session.enable_cancellation(cancel_closure);

+    // Report success to client
    client
+        .write_message_noflush(&Be::AuthenticationOk)?
+        .write_message_noflush(&BeParameterStatusMessage::encoding())?
        .write_message_noflush(&BeMessage::ParameterStatus(
            BeParameterStatusMessage::ServerVersion(&version),
        ))?
@@ -168,26 +176,33 @@ async fn connect_client_to_db(
        .write_message(&BeMessage::ReadyForQuery)
        .await?;

-    // This function will be called for writes to either direction.
-    fn inc_proxied(cnt: usize) {
-        // Consider inventing something more sophisticated
-        // if this ever becomes a bottleneck (cacheline bouncing).
-        NUM_BYTES_PROXIED_COUNTER.inc_by(cnt as u64);
-    }
-
-    let mut db = MetricsStream::new(db, inc_proxied);
-    let mut client = MetricsStream::new(client.into_inner(), inc_proxied);
+    let mut client = client.into_inner();
    let _ = tokio::io::copy_bidirectional(&mut client, &mut db).await?;

    Ok(())
 }

+fn hello_message(redirect_uri: &str, session_id: &str) -> String {
+    format!(
+        concat![
+            "☀️  Welcome to Zenith!\n",
+            "To proceed with database creation, open the following link:\n\n",
+            "    {redirect_uri}{session_id}\n\n",
+            "It needs to be done once and we will send you '.pgpass' file,\n",
+            "which will allow you to access or create ",
+            "databases without opening your web browser."
+        ],
+        redirect_uri = redirect_uri,
+        session_id = session_id,
+    )
+}
+
 /// Connect to a corresponding compute node.
 async fn connect_to_db(
-    db_info: DatabaseInfo,
+    db_info: DatabaseAuthInfo,
 ) -> anyhow::Result<(TcpStream, String, CancelClosure)> {
    // TODO: establish a secure connection to the DB
-    let socket_addr = db_info.socket_addr()?;
+    let socket_addr = db_info.conn_info.socket_addr()?;
    let mut socket = TcpStream::connect(socket_addr).await?;

    let (client, conn) = tokio_postgres::Config::from(db_info)
@@ -215,14 +230,10 @@ mod tests {

    async fn dummy_proxy(
        client: impl AsyncRead + AsyncWrite + Unpin,
-        tls: Option<TlsConfig>,
+        tls: Option<SslConfig>,
    ) -> anyhow::Result<()> {
-        let cancel_map = CancelMap::default();
-
        // TODO: add some infra + tests for credentials
-        let (mut stream, _creds) = handshake(client, tls, &cancel_map)
-            .await?
-            .context("no stream")?;
+        let (mut stream, _creds) = handshake(client, tls).await?.context("no stream")?;

        stream
            .write_message_noflush(&Be::AuthenticationOk)?
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,46 +1,15 @@
+use crate::cplane_api::DatabaseInfo;
 use anyhow::{anyhow, ensure, Context};
 use rustls::{internal::pemfile, NoClientAuth, ProtocolVersion, ServerConfig};
 use std::net::SocketAddr;
-use std::str::FromStr;
 use std::sync::Arc;

-pub type TlsConfig = Arc<ServerConfig>;
-
-#[non_exhaustive]
-pub enum ClientAuthMethod {
-    Password,
-    Link,
-
-    /// Use password auth only if username ends with "@zenith"
-    Mixed,
-}
-
-pub enum RouterConfig {
-    Static { host: String, port: u16 },
-    Dynamic(ClientAuthMethod),
-}
-
-impl FromStr for ClientAuthMethod {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> anyhow::Result<Self> {
-        use ClientAuthMethod::*;
-        match s {
-            "password" => Ok(Password),
-            "link" => Ok(Link),
-            "mixed" => Ok(Mixed),
-            _ => Err(anyhow::anyhow!("Invlid option for router")),
-        }
-    }
-}
+pub type SslConfig = Arc<ServerConfig>;

 pub struct ProxyConfig {
    /// main entrypoint for users to connect to
    pub proxy_address: SocketAddr,

-    /// method of assigning compute nodes
-    pub router_config: RouterConfig,
-
    /// internally used for status and prometheus metrics
    pub http_address: SocketAddr,

@@ -55,10 +24,26 @@ pub struct ProxyConfig {
    /// control plane address where we would check auth.
    pub auth_endpoint: String,

-    pub tls_config: Option<TlsConfig>,
+    pub ssl_config: Option<SslConfig>,
 }

-pub fn configure_ssl(key_path: &str, cert_path: &str) -> anyhow::Result<TlsConfig> {
+pub type ProxyWaiters = crate::waiters::Waiters<Result<DatabaseInfo, String>>;
+
+pub struct ProxyState {
+    pub conf: ProxyConfig,
+    pub waiters: ProxyWaiters,
+}
+
+impl ProxyState {
+    pub fn new(conf: ProxyConfig) -> Self {
+        Self {
+            conf,
+            waiters: ProxyWaiters::default(),
+        }
+    }
+}
+
+pub fn configure_ssl(key_path: &str, cert_path: &str) -> anyhow::Result<SslConfig> {
    let key = {
        let key_bytes = std::fs::read(key_path).context("SSL key file")?;
        let mut keys = pemfile::pkcs8_private_keys(&mut &key_bytes[..])
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -1,4 +1,3 @@
-use anyhow::Context;
 use bytes::BytesMut;
 use pin_project_lite::pin_project;
 use rustls::ServerConfig;
@@ -51,10 +50,12 @@ impl<S: AsyncRead + Unpin> PqStream<S> {
        }
    }

-    pub async fn read_message(&mut self) -> anyhow::Result<FeMessage> {
-        FeMessage::read_fut(&mut self.stream)
-            .await?
-            .context("connection is lost")
+    pub async fn read_password_message(&mut self) -> anyhow::Result<bytes::Bytes> {
+        match FeMessage::read_fut(&mut self.stream).await? {
+            Some(FeMessage::PasswordMessage(msg)) => Ok(msg),
+            None => anyhow::bail!("connection is lost"),
+            other => anyhow::bail!("bad message type: {:?}", other),
+        }
    }
 }

@@ -163,68 +164,3 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AsyncWrite for Stream<S> {
        }
    }
 }
-
-pin_project! {
-    /// This stream tracks all writes and calls user provided
-    /// callback when the underlying stream is flushed.
-    pub struct MetricsStream<S, W> {
-        #[pin]
-        stream: S,
-        write_count: usize,
-        inc_write_count: W,
-    }
-}
-
-impl<S, W> MetricsStream<S, W> {
-    pub fn new(stream: S, inc_write_count: W) -> Self {
-        Self {
-            stream,
-            write_count: 0,
-            inc_write_count,
-        }
-    }
-}
-
-impl<S: AsyncRead + Unpin, W> AsyncRead for MetricsStream<S, W> {
-    fn poll_read(
-        self: Pin<&mut Self>,
-        context: &mut task::Context<'_>,
-        buf: &mut ReadBuf<'_>,
-    ) -> task::Poll<io::Result<()>> {
-        self.project().stream.poll_read(context, buf)
-    }
-}
-
-impl<S: AsyncWrite + Unpin, W: FnMut(usize)> AsyncWrite for MetricsStream<S, W> {
-    fn poll_write(
-        self: Pin<&mut Self>,
-        context: &mut task::Context<'_>,
-        buf: &[u8],
-    ) -> task::Poll<io::Result<usize>> {
-        let this = self.project();
-        this.stream.poll_write(context, buf).map_ok(|cnt| {
-            // Increment the write count.
-            *this.write_count += cnt;
-            cnt
-        })
-    }
-
-    fn poll_flush(
-        self: Pin<&mut Self>,
-        context: &mut task::Context<'_>,
-    ) -> task::Poll<io::Result<()>> {
-        let this = self.project();
-        this.stream.poll_flush(context).map_ok(|()| {
-            // Call the user provided callback and reset the write count.
-            (this.inc_write_count)(*this.write_count);
-            *this.write_count = 0;
-        })
-    }
-
-    fn poll_shutdown(
-        self: Pin<&mut Self>,
-        context: &mut task::Context<'_>,
-    ) -> task::Poll<io::Result<()>> {
-        self.project().stream.poll_shutdown(context)
-    }
-}
--- a/proxy/src/waiters.rs
+++ b/proxy/src/waiters.rs
@@ -1,12 +1,8 @@
-use anyhow::{anyhow, Context};
-use hashbrown::HashMap;
-use parking_lot::Mutex;
-use pin_project_lite::pin_project;
-use std::pin::Pin;
-use std::task;
-use tokio::sync::oneshot;
+use anyhow::Context;
+use std::collections::HashMap;
+use std::sync::{mpsc, Mutex};

-pub struct Waiters<T>(pub(self) Mutex<HashMap<String, oneshot::Sender<T>>>);
+pub struct Waiters<T>(pub(self) Mutex<HashMap<String, mpsc::Sender<T>>>);

 impl<T> Default for Waiters<T> {
    fn default() -> Self {
@@ -15,86 +11,48 @@ impl<T> Default for Waiters<T> {
 }

 impl<T> Waiters<T> {
-    pub fn register(&self, key: String) -> anyhow::Result<Waiter<T>> {
-        let (tx, rx) = oneshot::channel();
+    pub fn register(&self, key: String) -> Waiter<T> {
+        let (tx, rx) = mpsc::channel();

-        self.0
-            .lock()
-            .try_insert(key.clone(), tx)
-            .map_err(|_| anyhow!("waiter already registered"))?;
+        // TODO: use `try_insert` (unstable)
+        let prev = self.0.lock().unwrap().insert(key.clone(), tx);
+        assert!(matches!(prev, None)); // assert_matches! is nightly-only

-        Ok(Waiter {
+        Waiter {
            receiver: rx,
-            guard: DropKey {
-                registry: self,
-                key,
-            },
-        })
+            registry: self,
+            key,
+        }
    }

    pub fn notify(&self, key: &str, value: T) -> anyhow::Result<()>
    where
-        T: Send + Sync,
+        T: Send + Sync + 'static,
    {
        let tx = self
            .0
            .lock()
+            .unwrap()
            .remove(key)
            .with_context(|| format!("key {} not found", key))?;
-
-        tx.send(value).map_err(|_| anyhow!("waiter channel hangup"))
+        tx.send(value).context("channel hangup")
    }
 }

-struct DropKey<'a, T> {
-    key: String,
+pub struct Waiter<'a, T> {
+    receiver: mpsc::Receiver<T>,
    registry: &'a Waiters<T>,
+    key: String,
 }

-impl<'a, T> Drop for DropKey<'a, T> {
+impl<T> Waiter<'_, T> {
+    pub fn wait(self) -> anyhow::Result<T> {
+        self.receiver.recv().context("channel hangup")
+    }
+}
+
+impl<T> Drop for Waiter<'_, T> {
    fn drop(&mut self) {
-        self.registry.0.lock().remove(&self.key);
-    }
-}
-
-pin_project! {
-    pub struct Waiter<'a, T> {
-        #[pin]
-        receiver: oneshot::Receiver<T>,
-        guard: DropKey<'a, T>,
-    }
-}
-
-impl<T> std::future::Future for Waiter<'_, T> {
-    type Output = anyhow::Result<T>;
-
-    fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> task::Poll<Self::Output> {
-        self.project()
-            .receiver
-            .poll(cx)
-            .map_err(|_| anyhow!("channel hangup"))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::sync::Arc;
-
-    #[tokio::test]
-    async fn test_waiter() -> anyhow::Result<()> {
-        let waiters = Arc::new(Waiters::default());
-
-        let key = "Key";
-        let waiter = waiters.register(key.to_owned())?;
-
-        let waiters = Arc::clone(&waiters);
-        let notifier = tokio::spawn(async move {
-            waiters.notify(key, Default::default())?;
-            Ok(())
-        });
-
-        let () = waiter.await?;
-        notifier.await?
+        self.registry.0.lock().unwrap().remove(&self.key);
    }
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "zenith"
 version = "0.1.0"
 description = ""
-authors = []
+authors = ["Dmitry Rodionov <dmitry@zenith.tech>"]

 [tool.poetry.dependencies]
 python = "^3.7"
--- a/scripts/generate_and_push_perf_report.sh
+++ b/scripts/generate_and_push_perf_report.sh
@@ -1,24 +1,27 @@
 #!/bin/bash

 # this is a shortcut script to avoid duplication in CI
+
 set -eux -o pipefail

 SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"

-echo "Uploading perf report to zenith pg"
-# ingest per test results data into zenith backed postgres running in staging to build grafana reports on that data
-DATABASE_URL="$PERF_TEST_RESULT_CONNSTR" poetry run python "$SCRIPT_DIR"/ingest_perf_test_result.py --ingest "$REPORT_FROM"
+git clone https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git
+cd zenith-perf-data
+mkdir -p reports/
+mkdir -p data/$REPORT_TO

-# Activate poetry's venv. Needed because git upload does not run in a project dir (it uses tmp to store the repository)
-# so the problem occurs because poetry cannot find pyproject.toml in temp dir created by git upload
-# shellcheck source=/dev/null
-. "$(poetry env info --path)"/bin/activate
+cp $REPORT_FROM/* data/$REPORT_TO

-echo "Uploading perf result to zenith-perf-data"
-scripts/git-upload \
-    --repo=https://"$VIP_VAP_ACCESS_TOKEN"@github.com/zenithdb/zenith-perf-data.git \
-    --message="add performance test result for $GITHUB_SHA zenith revision" \
-    --branch=master \
-    copy "$REPORT_FROM" "data/$REPORT_TO" `# COPY FROM TO_RELATIVE`\
-    --merge \
-    --run-cmd "python $SCRIPT_DIR/generate_perf_report_page.py --input-dir data/$REPORT_TO --out reports/$REPORT_TO.html"
+echo "Generating report"
+poetry run python $SCRIPT_DIR/generate_perf_report_page.py --input-dir data/$REPORT_TO --out reports/$REPORT_TO.html
+echo "Uploading perf result"
+git add data reports
+git \
+    -c "user.name=vipvap" \
+    -c "user.email=vipvap@zenith.tech" \
+    commit \
+    --author="vipvap <vipvap@zenith.tech>" \
+    -m "add performance test result for $GITHUB_SHA zenith revision"
+
+git push https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git master
--- a/scripts/git-upload
+++ b/scripts/git-upload
@@ -1,9 +1,7 @@
 #!/usr/bin/env python3

 from contextlib import contextmanager
-import shlex
 from tempfile import TemporaryDirectory
-from distutils.dir_util import copy_tree
 from pathlib import Path

 import argparse
@@ -11,8 +9,6 @@ import os
 import shutil
 import subprocess
 import sys
-import textwrap
-from typing import Optional


 def absolute_path(path):
@@ -42,21 +38,13 @@ def run(cmd, *args, **kwargs):


 class GitRepo:
-    def __init__(self, url, branch: Optional[str] = None):
+    def __init__(self, url):
        self.url = url
        self.cwd = TemporaryDirectory()
-        self.branch = branch
-
-        args = [
-            'git',
-            'clone',
-            '--single-branch',
-        ]
-        if self.branch:
-            args.extend(['--branch', self.branch])

        subprocess.check_call([
-            *args,
+            'git',
+            'clone',
            str(url),
            self.cwd.name,
        ])
@@ -112,44 +100,23 @@ def do_copy(args):
        raise FileExistsError(f"File exists: '{dst}'")

    if src.is_dir():
-        if not args.merge:
-            shutil.rmtree(dst, ignore_errors=True)
-        # distutils is deprecated, but this is a temporary workaround before python version bump
-        # here we need dir_exists_ok=True from shutil.copytree which is available in python 3.8+
-        copy_tree(str(src), str(dst))
+        shutil.rmtree(dst, ignore_errors=True)
+        shutil.copytree(src, dst)
    else:
        shutil.copy(src, dst)

-    if args.run_cmd:
-        run(shlex.split(args.run_cmd))
-

 def main():
    parser = argparse.ArgumentParser(description='Git upload tool')
    parser.add_argument('--repo', type=str, metavar='URL', required=True, help='git repo url')
    parser.add_argument('--message', type=str, metavar='TEXT', help='commit message')
-    parser.add_argument('--branch', type=str, metavar='TEXT', help='target git repo branch')

    commands = parser.add_subparsers(title='commands', dest='subparser_name')

-    p_copy = commands.add_parser(
-        'copy',
-        help='copy file into the repo',
-        formatter_class=argparse.RawTextHelpFormatter,
-    )
+    p_copy = commands.add_parser('copy', help='copy file into the repo')
    p_copy.add_argument('src', type=absolute_path, help='source path')
    p_copy.add_argument('dst', type=relative_path, help='relative dest path')
    p_copy.add_argument('--forbid-overwrite', action='store_true', help='do not allow overwrites')
-    p_copy.add_argument(
-        '--merge',
-        action='store_true',
-        help='when copying a directory do not delete existing data, but add new files')
-    p_copy.add_argument('--run-cmd',
-                        help=textwrap.dedent('''\
-                run arbitrary cmd on top of copied files,
-                example usage is static content generation
-                based on current repository state\
-            '''))

    args = parser.parse_args()

@@ -160,7 +127,7 @@ def main():
    action = commands.get(args.subparser_name)
    if action:
        message = args.message or 'update'
-        GitRepo(args.repo, args.branch).update(message, lambda: action(args))
+        GitRepo(args.repo).update(message, lambda: action(args))
    else:
        parser.print_usage()

--- a/scripts/ingest_perf_test_result.py
+++ b/scripts/ingest_perf_test_result.py
@@ -1,136 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-from contextlib import contextmanager
-import json
-import os
-import psycopg2
-import psycopg2.extras
-from pathlib import Path
-from datetime import datetime
-
-CREATE_TABLE = """
-CREATE TABLE IF NOT EXISTS perf_test_results (
-    id SERIAL PRIMARY KEY,
-    suit TEXT,
-    revision CHAR(40),
-    platform TEXT,
-    metric_name TEXT,
-    metric_value NUMERIC,
-    metric_unit VARCHAR(10),
-    metric_report_type TEXT,
-    recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW()
-)
-"""
-
-
-def err(msg):
-    print(f'error: {msg}')
-    exit(1)
-
-
-@contextmanager
-def get_connection_cursor():
-    connstr = os.getenv('DATABASE_URL')
-    if not connstr:
-        err('DATABASE_URL environment variable is not set')
-    with psycopg2.connect(connstr) as conn:
-        with conn.cursor() as cur:
-            yield cur
-
-
-def create_table(cur):
-    cur.execute(CREATE_TABLE)
-
-
-def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) -> int:
-    run_data = json.loads(data_dile.read_text())
-    revision = run_data['revision']
-    platform = run_data['platform']
-
-    run_result = run_data['result']
-    args_list = []
-
-    for suit_result in run_result:
-        suit = suit_result['suit']
-        total_duration = suit_result['total_duration']
-
-        suit_result['data'].append({
-            'name': 'total_duration',
-            'value': total_duration,
-            'unit': 's',
-            'report': 'lower_is_better',
-        })
-
-        for metric in suit_result['data']:
-            values = {
-                'suit': suit,
-                'revision': revision,
-                'platform': platform,
-                'metric_name': metric['name'],
-                'metric_value': metric['value'],
-                'metric_unit': metric['unit'],
-                'metric_report_type': metric['report'],
-                'recorded_at_timestamp': datetime.utcfromtimestamp(recorded_at_timestamp),
-            }
-            args_list.append(values)
-
-    psycopg2.extras.execute_values(
-        cursor,
-        """
-        INSERT INTO perf_test_results (
-            suit,
-            revision,
-            platform,
-            metric_name,
-            metric_value,
-            metric_unit,
-            metric_report_type,
-            recorded_at_timestamp
-        ) VALUES %s
-        """,
-        args_list,
-        template="""(
-            %(suit)s,
-            %(revision)s,
-            %(platform)s,
-            %(metric_name)s,
-            %(metric_value)s,
-            %(metric_unit)s,
-            %(metric_report_type)s,
-            %(recorded_at_timestamp)s
-        )""",
-    )
-    return len(args_list)
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Perf test result uploader. \
-            Database connection string should be provided via DATABASE_URL environment variable', )
-    parser.add_argument(
-        '--ingest',
-        type=Path,
-        help='Path to perf test result file, or directory with perf test result files')
-    parser.add_argument('--initdb', action='store_true', help='Initialuze database')
-
-    args = parser.parse_args()
-    with get_connection_cursor() as cur:
-        if args.initdb:
-            create_table(cur)
-
-        if not args.ingest.exists():
-            err(f'ingest path {args.ingest} does not exist')
-
-        if args.ingest:
-            if args.ingest.is_dir():
-                for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split('_')[0])):
-                    recorded_at_timestamp = int(item.name.split('_')[0])
-                    ingested = ingest_perf_test_result(cur, item, recorded_at_timestamp)
-                    print(f'Ingested {ingested} metric values from {item}')
-            else:
-                recorded_at_timestamp = int(args.ingest.name.split('_')[0])
-                ingested = ingest_perf_test_result(cur, args.ingest, recorded_at_timestamp)
-                print(f'Ingested {ingested} metric values from {args.ingest}')
-
-
-if __name__ == '__main__':
-    main()
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -89,7 +89,7 @@ def test_foobar(zenith_env_builder: ZenithEnvBuilder):

    # Now create the environment. This initializes the repository, and starts
    # up the page server and the safekeepers
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    # Run the test
    ...
--- a/test_runner/batch_others/test_auth.py
+++ b/test_runner/batch_others/test_auth.py
@@ -1,49 +1,45 @@
 from contextlib import closing
 from typing import Iterator
-from uuid import UUID, uuid4
+from uuid import uuid4
 import psycopg2
-from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithPageserverApiException
+from fixtures.zenith_fixtures import ZenithEnvBuilder
 import pytest

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.pageserver_auth_enabled = True
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    ps = env.pageserver

-    tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant.hex)
-    tenant_http_client = env.pageserver.http_client(tenant_token)
+    tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)
    invalid_tenant_token = env.auth_keys.generate_tenant_token(uuid4().hex)
-    invalid_tenant_http_client = env.pageserver.http_client(invalid_tenant_token)
-
    management_token = env.auth_keys.generate_management_token()
-    management_http_client = env.pageserver.http_client(management_token)

    # this does not invoke auth check and only decodes jwt and checks it for validity
    # check both tokens
-    ps.safe_psql("set FOO", password=tenant_token)
-    ps.safe_psql("set FOO", password=management_token)
+    ps.safe_psql("status", password=tenant_token)
+    ps.safe_psql("status", password=management_token)

    # tenant can create branches
-    tenant_http_client.branch_create(env.initial_tenant, 'new1', 'main')
+    ps.safe_psql(f"branch_create {env.initial_tenant} new1 main", password=tenant_token)
    # console can create branches for tenant
-    management_http_client.branch_create(env.initial_tenant, 'new2', 'main')
+    ps.safe_psql(f"branch_create {env.initial_tenant} new2 main", password=management_token)

-    # fail to create branch using token with different tenant_id
-    with pytest.raises(ZenithPageserverApiException,
-                       match='Forbidden: Tenant id mismatch. Permission denied'):
-        invalid_tenant_http_client.branch_create(env.initial_tenant, "new3", "main")
+    # fail to create branch using token with different tenantid
+    with pytest.raises(psycopg2.DatabaseError, match='Tenant id mismatch. Permission denied'):
+        ps.safe_psql(f"branch_create {env.initial_tenant} new2 main", password=invalid_tenant_token)

    # create tenant using management token
-    management_http_client.tenant_create(uuid4())
+    ps.safe_psql(f"tenant_create {uuid4().hex}", password=management_token)

    # fail to create tenant using tenant token
    with pytest.raises(
-            ZenithPageserverApiException,
-            match='Forbidden: Attempt to access management api with tenant scope. Permission denied'
-    ):
-        tenant_http_client.tenant_create(uuid4())
+            psycopg2.DatabaseError,
+            match='Attempt to access management api with tenant scope. Permission denied'):
+        ps.safe_psql(f"tenant_create {uuid4().hex}", password=tenant_token)


@pytest.mark.parametrize('with_wal_acceptors', [False, True])
@@ -51,10 +47,10 @@ def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_w
    zenith_env_builder.pageserver_auth_enabled = True
    if with_wal_acceptors:
        zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    branch = f"test_compute_auth_to_pageserver{with_wal_acceptors}"
-    env.zenith_cli.create_branch(branch, "main")
+    env.zenith_cli(["branch", branch, "main"])

    pg = env.postgres.create_start(branch)

--- a/test_runner/batch_others/test_backpressure.py
+++ b/test_runner/batch_others/test_backpressure.py
@@ -1,154 +0,0 @@
-from contextlib import closing, contextmanager
-import psycopg2.extras
-from fixtures.zenith_fixtures import ZenithEnvBuilder
-from fixtures.log_helper import log
-import os
-import time
-import asyncpg
-from fixtures.zenith_fixtures import Postgres
-import threading
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-
-@contextmanager
-def pg_cur(pg):
-    with closing(pg.connect()) as conn:
-        with conn.cursor() as cur:
-            yield cur
-
-
-# Periodically check that all backpressure lags are below the configured threshold,
-# assert if they are not.
-# If the check query fails, stop the thread. Main thread should notice that and stop the test.
-def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interval=5):
-    log.info("checks started")
-
-    with pg_cur(pg) as cur:
-        cur.execute("CREATE EXTENSION zenith")  # TODO move it to zenith_fixtures?
-
-        cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
-        res = cur.fetchone()
-        max_replication_write_lag_bytes = res[0]
-        log.info(f"max_replication_write_lag: {max_replication_write_lag_bytes} bytes")
-
-        cur.execute("select pg_size_bytes(current_setting('max_replication_flush_lag'))")
-        res = cur.fetchone()
-        max_replication_flush_lag_bytes = res[0]
-        log.info(f"max_replication_flush_lag: {max_replication_flush_lag_bytes} bytes")
-
-        cur.execute("select pg_size_bytes(current_setting('max_replication_apply_lag'))")
-        res = cur.fetchone()
-        max_replication_apply_lag_bytes = res[0]
-        log.info(f"max_replication_apply_lag: {max_replication_apply_lag_bytes} bytes")
-
-    with pg_cur(pg) as cur:
-        while not stop_event.is_set():
-            try:
-                cur.execute('''
-                select pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn) as received_lsn_lag,
-                pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn) as disk_consistent_lsn_lag,
-                pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn) as remote_consistent_lsn_lag,
-                pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn)),
-                pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn)),
-                pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn))
-                from backpressure_lsns();
-                ''')
-
-                res = cur.fetchone()
-                received_lsn_lag = res[0]
-                disk_consistent_lsn_lag = res[1]
-                remote_consistent_lsn_lag = res[2]
-
-                log.info(f"received_lsn_lag = {received_lsn_lag} ({res[3]}), "
-                         f"disk_consistent_lsn_lag = {disk_consistent_lsn_lag} ({res[4]}), "
-                         f"remote_consistent_lsn_lag = {remote_consistent_lsn_lag} ({res[5]})")
-
-                # Since feedback from pageserver is not immediate, we should allow some lag overflow
-                lag_overflow = 5 * 1024 * 1024  # 5MB
-
-                if max_replication_write_lag_bytes > 0:
-                    assert received_lsn_lag < max_replication_write_lag_bytes + lag_overflow
-                if max_replication_flush_lag_bytes > 0:
-                    assert disk_consistent_lsn_lag < max_replication_flush_lag_bytes + lag_overflow
-                if max_replication_apply_lag_bytes > 0:
-                    assert remote_consistent_lsn_lag < max_replication_apply_lag_bytes + lag_overflow
-
-                time.sleep(polling_interval)
-
-            except Exception as e:
-                log.info(f"backpressure check query failed: {e}")
-                stop_event.set()
-
-    log.info('check thread stopped')
-
-
-# This test illustrates how to tune backpressure to control the lag
-# between the WAL flushed on compute node and WAL digested by pageserver.
-#
-# To test it, throttle walreceiver ingest using failpoint and run heavy write load.
-# If backpressure is disabled or not tuned properly, the query will timeout, because the walreceiver cannot keep up.
-# If backpressure is enabled and tuned properly, insertion will be throttled, but the query will not timeout.
-
-
-def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init()
-    # Create a branch for us
-    env.zenith_cli.create_branch("test_backpressure", "main")
-
-    pg = env.postgres.create_start('test_backpressure',
-                                   config_lines=['max_replication_write_lag=30MB'])
-    log.info("postgres is running on 'test_backpressure' branch")
-
-    # setup check thread
-    check_stop_event = threading.Event()
-    check_thread = threading.Thread(target=check_backpressure, args=(pg, check_stop_event))
-    check_thread.start()
-
-    # Configure failpoint to slow down walreceiver ingest
-    with closing(env.pageserver.connect()) as psconn:
-        with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
-            pscur.execute("failpoints walreceiver-after-ingest=sleep(20)")
-
-    # FIXME
-    # Wait for the check thread to start
-    #
-    # Now if load starts too soon,
-    # check thread cannot auth, because it is not able to connect to the database
-    # because of the lag and waiting for lsn to replay to arrive.
-    time.sleep(2)
-
-    with pg_cur(pg) as cur:
-        # Create and initialize test table
-        cur.execute("CREATE TABLE foo(x bigint)")
-
-        inserts_to_do = 2000000
-        rows_inserted = 0
-
-        while check_thread.is_alive() and rows_inserted < inserts_to_do:
-            try:
-                cur.execute("INSERT INTO foo select from generate_series(1, 100000)")
-                rows_inserted += 100000
-            except Exception as e:
-                if check_thread.is_alive():
-                    log.info('stopping check thread')
-                    check_stop_event.set()
-                    check_thread.join()
-                    assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly"
-                else:
-                    assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work."
-
-        log.info(f"inserted {rows_inserted} rows")
-
-    if check_thread.is_alive():
-        log.info('stopping check thread')
-        check_stop_event.set()
-        check_thread.join()
-        log.info('check thread stopped')
-    else:
-        assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work."
-
-
-#TODO test_backpressure_disk_consistent_lsn_lag. Play with pageserver's checkpoint settings
-#TODO test_backpressure_remote_consistent_lsn_lag
--- a/test_runner/batch_others/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -7,6 +7,8 @@ from fixtures.log_helper import log
 from fixtures.utils import print_gc_result
 from fixtures.zenith_fixtures import ZenithEnvBuilder

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Create a couple of branches off the main branch, at a historical point in time.
@@ -19,10 +21,10 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
    #
    # See https://github.com/zenithdb/zenith/issues/1068
    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    # Branch at the point where only 100 rows were inserted
-    env.zenith_cli.create_branch("test_branch_behind", "main")
+    env.zenith_cli(["branch", "test_branch_behind", "main"])

    pgmain = env.postgres.create_start('test_branch_behind')
    log.info("postgres is running on 'test_branch_behind' branch")
@@ -60,7 +62,7 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
    log.info(f'LSN after 200100 rows: {lsn_b}')

    # Branch at the point where only 100 rows were inserted
-    env.zenith_cli.create_branch("test_branch_behind_hundred", "test_branch_behind@" + lsn_a)
+    env.zenith_cli(["branch", "test_branch_behind_hundred", "test_branch_behind@" + lsn_a])

    # Insert many more rows. This generates enough WAL to fill a few segments.
    main_cur.execute('''
@@ -75,7 +77,7 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
    log.info(f'LSN after 400100 rows: {lsn_c}')

    # Branch at the point where only 200100 rows were inserted
-    env.zenith_cli.create_branch("test_branch_behind_more", "test_branch_behind@" + lsn_b)
+    env.zenith_cli(["branch", "test_branch_behind_more", "test_branch_behind@" + lsn_b])

    pg_hundred = env.postgres.create_start("test_branch_behind_hundred")
    pg_more = env.postgres.create_start("test_branch_behind_more")
@@ -99,7 +101,7 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
    # Check bad lsn's for branching

    # branch at segment boundary
-    env.zenith_cli.create_branch("test_branch_segment_boundary", "test_branch_behind@0/3000000")
+    env.zenith_cli(["branch", "test_branch_segment_boundary", "test_branch_behind@0/3000000"])
    pg = env.postgres.create_start("test_branch_segment_boundary")
    cur = pg.connect().cursor()
    cur.execute('SELECT 1')
@@ -107,23 +109,23 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):

    # branch at pre-initdb lsn
    with pytest.raises(Exception, match="invalid branch start lsn"):
-        env.zenith_cli.create_branch("test_branch_preinitdb", "main@0/42")
+        env.zenith_cli(["branch", "test_branch_preinitdb", "main@0/42"])

    # branch at pre-ancestor lsn
    with pytest.raises(Exception, match="less than timeline ancestor lsn"):
-        env.zenith_cli.create_branch("test_branch_preinitdb", "test_branch_behind@0/42")
+        env.zenith_cli(["branch", "test_branch_preinitdb", "test_branch_behind@0/42"])

    # check that we cannot create branch based on garbage collected data
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
            # call gc to advace latest_gc_cutoff_lsn
-            pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+            pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
            row = pscur.fetchone()
            print_gc_result(row)

    with pytest.raises(Exception, match="invalid branch start lsn"):
        # this gced_lsn is pretty random, so if gc is disabled this woudln't fail
-        env.zenith_cli.create_branch("test_branch_create_fail", f"test_branch_behind@{gced_lsn}")
+        env.zenith_cli(["branch", "test_branch_create_fail", f"test_branch_behind@{gced_lsn}"])

    # check that after gc everything is still there
    hundred_cur.execute('SELECT count(*) FROM foo')
--- a/test_runner/batch_others/test_clog_truncate.py
+++ b/test_runner/batch_others/test_clog_truncate.py
@@ -6,13 +6,16 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test compute node start after clog truncation
 #
 def test_clog_truncate(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_clog_truncate", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_clog_truncate", "empty"])

    # set agressive autovacuum to make sure that truncation will happen
    config = [
@@ -62,8 +65,8 @@ def test_clog_truncate(zenith_simple_env: ZenithEnv):

    # create new branch after clog truncation and start a compute node on it
    log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
-    env.zenith_cli.create_branch("test_clog_truncate_new",
-                                 "test_clog_truncate@" + lsn_after_truncation)
+    env.zenith_cli(
+        ["branch", "test_clog_truncate_new", "test_clog_truncate@" + lsn_after_truncation])

    pg2 = env.postgres.create_start('test_clog_truncate_new')
    log.info('postgres is running on test_clog_truncate_new branch')
--- a/test_runner/batch_others/test_config.py
+++ b/test_runner/batch_others/test_config.py
@@ -3,13 +3,16 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test starting Postgres with custom options
 #
 def test_config(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_config", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_config", "empty"])

    # change config
    pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])
--- a/test_runner/batch_others/test_createdropdb.py
+++ b/test_runner/batch_others/test_createdropdb.py
@@ -5,13 +5,15 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test CREATE DATABASE when there have been relmapper changes
 #
 def test_createdb(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_createdb", "empty")
+    env.zenith_cli(["branch", "test_createdb", "empty"])

    pg = env.postgres.create_start('test_createdb')
    log.info("postgres is running on 'test_createdb' branch")
@@ -27,7 +29,7 @@ def test_createdb(zenith_simple_env: ZenithEnv):
            lsn = cur.fetchone()[0]

    # Create a branch
-    env.zenith_cli.create_branch("test_createdb2", "test_createdb@" + lsn)
+    env.zenith_cli(["branch", "test_createdb2", "test_createdb@" + lsn])

    pg2 = env.postgres.create_start('test_createdb2')

@@ -41,7 +43,7 @@ def test_createdb(zenith_simple_env: ZenithEnv):
 #
 def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_dropdb", "empty")
+    env.zenith_cli(["branch", "test_dropdb", "empty"])

    pg = env.postgres.create_start('test_dropdb')
    log.info("postgres is running on 'test_dropdb' branch")
@@ -66,10 +68,10 @@ def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
            lsn_after_drop = cur.fetchone()[0]

    # Create two branches before and after database drop.
-    env.zenith_cli.create_branch("test_before_dropdb", "test_dropdb@" + lsn_before_drop)
+    env.zenith_cli(["branch", "test_before_dropdb", "test_dropdb@" + lsn_before_drop])
    pg_before = env.postgres.create_start('test_before_dropdb')

-    env.zenith_cli.create_branch("test_after_dropdb", "test_dropdb@" + lsn_after_drop)
+    env.zenith_cli(["branch", "test_after_dropdb", "test_dropdb@" + lsn_after_drop])
    pg_after = env.postgres.create_start('test_after_dropdb')

    # Test that database exists on the branch before drop
--- a/test_runner/batch_others/test_createuser.py
+++ b/test_runner/batch_others/test_createuser.py
@@ -3,13 +3,15 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test CREATE USER to check shared catalog restore
 #
 def test_createuser(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_createuser", "empty")
+    env.zenith_cli(["branch", "test_createuser", "empty"])

    pg = env.postgres.create_start('test_createuser')
    log.info("postgres is running on 'test_createuser' branch")
@@ -25,7 +27,7 @@ def test_createuser(zenith_simple_env: ZenithEnv):
            lsn = cur.fetchone()[0]

    # Create a branch
-    env.zenith_cli.create_branch("test_createuser2", "test_createuser@" + lsn)
+    env.zenith_cli(["branch", "test_createuser2", "test_createuser@" + lsn])

    pg2 = env.postgres.create_start('test_createuser2')

--- a/test_runner/batch_others/test_gc_aggressive.py
+++ b/test_runner/batch_others/test_gc_aggressive.py
@@ -7,6 +7,8 @@ import random
 from fixtures.zenith_fixtures import ZenithEnv, Postgres, Safekeeper
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+
 # Test configuration
 #
 # Create a table with {num_rows} rows, and perform {updates_to_perform} random
@@ -34,7 +36,7 @@ async def gc(env: ZenithEnv, timeline: str):
    psconn = await env.pageserver.connect_async()

    while updates_performed < updates_to_perform:
-        await psconn.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+        await psconn.execute(f"do_gc {env.initial_tenant} {timeline} 0")


 # At the same time, run UPDATEs and GC
@@ -55,7 +57,9 @@ async def update_and_gc(env: ZenithEnv, pg: Postgres, timeline: str):
 #
 def test_gc_aggressive(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_gc_aggressive", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_gc_aggressive", "empty"])
+
    pg = env.postgres.create_start('test_gc_aggressive')
    log.info('postgres is running on test_gc_aggressive branch')

--- a/test_runner/batch_others/test_multixact.py
+++ b/test_runner/batch_others/test_multixact.py
@@ -1,6 +1,8 @@
 from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test multixact state after branching
@@ -10,7 +12,8 @@ from fixtures.log_helper import log
 #
 def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_multixact", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_multixact", "empty"])
    pg = env.postgres.create_start('test_multixact')

    log.info("postgres is running on 'test_multixact' branch")
@@ -60,7 +63,7 @@ def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
    assert int(next_multixact_id) > int(next_multixact_id_old)

    # Branch at this point
-    env.zenith_cli.create_branch("test_multixact_new", "test_multixact@" + lsn)
+    env.zenith_cli(["branch", "test_multixact_new", "test_multixact@" + lsn])
    pg_new = env.postgres.create_start('test_multixact_new')

    log.info("postgres is running on 'test_multixact_new' branch")
--- a/test_runner/batch_others/test_next_xid.py
+++ b/test_runner/batch_others/test_next_xid.py
@@ -5,13 +5,15 @@ import time
 from fixtures.zenith_fixtures import ZenithEnvBuilder
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 # Test restarting page server, while safekeeper and compute node keep
 # running.
 def test_next_xid(zenith_env_builder: ZenithEnvBuilder):
    # One safekeeper is enough for this test.
    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    pg = env.postgres.create_start('main')

--- a/test_runner/batch_others/test_old_request_lsn.py
+++ b/test_runner/batch_others/test_old_request_lsn.py
@@ -3,6 +3,8 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test where Postgres generates a lot of WAL, and it's garbage collected away, but
@@ -16,7 +18,8 @@ from fixtures.log_helper import log
 #
 def test_old_request_lsn(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_old_request_lsn", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_old_request_lsn", "empty"])
    pg = env.postgres.create_start('test_old_request_lsn')
    log.info('postgres is running on test_old_request_lsn branch')

@@ -54,7 +57,7 @@ def test_old_request_lsn(zenith_simple_env: ZenithEnv):
    # Make a lot of updates on a single row, generating a lot of WAL. Trigger
    # garbage collections so that the page server will remove old page versions.
    for i in range(10):
-        pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+        pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
        for j in range(100):
            cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;')

--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -1,22 +1,95 @@
+import json
 from uuid import uuid4, UUID
 import pytest
-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient, zenith_binpath
+import psycopg2
+import requests
+from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
+from typing import cast
+
+pytest_plugins = ("fixtures.zenith_fixtures")


-# test that we cannot override node id
-def test_pageserver_init_node_id(zenith_env_builder: ZenithEnvBuilder):
+def test_status_psql(zenith_simple_env: ZenithEnv):
+    env = zenith_simple_env
+    assert env.pageserver.safe_psql('status') == [
+        ('hello world', ),
+    ]
+
+
+def test_branch_list_psql(zenith_simple_env: ZenithEnv):
+    env = zenith_simple_env
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_branch_list_main", "empty"])
+
+    conn = env.pageserver.connect()
+    cur = conn.cursor()
+
+    cur.execute(f'branch_list {env.initial_tenant}')
+    branches = json.loads(cur.fetchone()[0])
+    # Filter out branches created by other tests
+    branches = [x for x in branches if x['name'].startswith('test_branch_list')]
+
+    assert len(branches) == 1
+    assert branches[0]['name'] == 'test_branch_list_main'
+    assert 'timeline_id' in branches[0]
+    assert 'latest_valid_lsn' in branches[0]
+    assert 'ancestor_id' in branches[0]
+    assert 'ancestor_lsn' in branches[0]
+
+    # Create another branch, and start Postgres on it
+    env.zenith_cli(['branch', 'test_branch_list_experimental', 'test_branch_list_main'])
+    env.zenith_cli(['pg', 'create', 'test_branch_list_experimental'])
+
+    cur.execute(f'branch_list {env.initial_tenant}')
+    new_branches = json.loads(cur.fetchone()[0])
+    # Filter out branches created by other tests
+    new_branches = [x for x in new_branches if x['name'].startswith('test_branch_list')]
+    assert len(new_branches) == 2
+    new_branches.sort(key=lambda k: k['name'])
+
+    assert new_branches[0]['name'] == 'test_branch_list_experimental'
+    assert new_branches[0]['timeline_id'] != branches[0]['timeline_id']
+
+    # TODO: do the LSNs have to match here?
+    assert new_branches[1] == branches[0]
+
+    conn.close()
+
+
+def test_tenant_list_psql(zenith_env_builder: ZenithEnvBuilder):
+    # don't use zenith_simple_env, because there might be other tenants there,
+    # left over from other tests.
    env = zenith_env_builder.init()
-    with pytest.raises(
-            Exception,
-            match="node id can only be set during pageserver init and cannot be overridden"):
-        env.pageserver.start(overrides=['--pageserver-config-override=id=10'])
+
+    res = env.zenith_cli(["tenant", "list"])
+    res.check_returncode()
+    tenants = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
+    assert tenants == [env.initial_tenant]
+
+    conn = env.pageserver.connect()
+    cur = conn.cursor()
+
+    # check same tenant cannot be created twice
+    with pytest.raises(psycopg2.DatabaseError,
+                       match=f'repo for {env.initial_tenant} already exists'):
+        cur.execute(f'tenant_create {env.initial_tenant}')
+
+    # create one more tenant
+    tenant1 = uuid4().hex
+    cur.execute(f'tenant_create {tenant1}')
+
+    cur.execute('tenant_list')
+
+    # compare tenants list
+    new_tenants = sorted(map(lambda t: cast(str, t['id']), json.loads(cur.fetchone()[0])))
+    assert sorted([env.initial_tenant, tenant1]) == new_tenants


-def check_client(client: ZenithPageserverHttpClient, initial_tenant: UUID):
+def check_client(client: ZenithPageserverHttpClient, initial_tenant: str):
    client.check_status()

    # check initial tenant is there
-    assert initial_tenant.hex in {t['id'] for t in client.tenant_list()}
+    assert initial_tenant in {t['id'] for t in client.tenant_list()}

    # create new tenant and check it is also there
    tenant_id = uuid4()
@@ -48,7 +121,7 @@ def test_pageserver_http_api_client(zenith_simple_env: ZenithEnv):

 def test_pageserver_http_api_client_auth_enabled(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.pageserver_auth_enabled = True
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    management_token = env.auth_keys.generate_management_token()

--- a/test_runner/batch_others/test_pageserver_catchup.py
+++ b/test_runner/batch_others/test_pageserver_catchup.py
@@ -7,6 +7,8 @@ from multiprocessing import Process, Value
 from fixtures.zenith_fixtures import ZenithEnvBuilder
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 # Test safekeeper sync and pageserver catch up
 # while initial compute node is down and pageserver is lagging behind safekeepers.
@@ -14,9 +16,9 @@ from fixtures.log_helper import log
 # and new compute node contains all data.
 def test_pageserver_catchup_while_compute_down(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_pageserver_catchup_while_compute_down", "main")
+    env.zenith_cli(["branch", "test_pageserver_catchup_while_compute_down", "main"])
    pg = env.postgres.create_start('test_pageserver_catchup_while_compute_down')

    pg_conn = pg.connect()
--- a/test_runner/batch_others/test_pageserver_restart.py
+++ b/test_runner/batch_others/test_pageserver_restart.py
@@ -7,15 +7,17 @@ from multiprocessing import Process, Value
 from fixtures.zenith_fixtures import ZenithEnvBuilder
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 # Test restarting page server, while safekeeper and compute node keep
 # running.
 def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder):
    # One safekeeper is enough for this test.
    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_pageserver_restart", "main")
+    env.zenith_cli(["branch", "test_pageserver_restart", "main"])
    pg = env.postgres.create_start('test_pageserver_restart')

    pg_conn = pg.connect()
--- a/test_runner/batch_others/test_parallel_copy.py
+++ b/test_runner/batch_others/test_parallel_copy.py
@@ -5,6 +5,8 @@ import subprocess
 from fixtures.zenith_fixtures import ZenithEnv, Postgres
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 async def repeat_bytes(buf, repetitions: int):
    for i in range(repetitions):
@@ -37,7 +39,9 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int):
 # Load data into one table with COPY TO from 5 parallel connections
 def test_parallel_copy(zenith_simple_env: ZenithEnv, n_parallel=5):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_parallel_copy", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_parallel_copy", "empty"])
+
    pg = env.postgres.create_start('test_parallel_copy')
    log.info("postgres is running on 'test_parallel_copy' branch")

--- a/test_runner/batch_others/test_pgbench.py
+++ b/test_runner/batch_others/test_pgbench.py
@@ -1,10 +1,14 @@
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_pgbench", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_pgbench", "empty"])
+
    pg = env.postgres.create_start('test_pgbench')
    log.info("postgres is running on 'test_pgbench' branch")

--- a/test_runner/batch_others/test_readonly_node.py
+++ b/test_runner/batch_others/test_readonly_node.py
@@ -2,6 +2,8 @@ import pytest
 from fixtures.log_helper import log
 from fixtures.zenith_fixtures import ZenithEnv

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Create read-only compute nodes, anchored at historical points in time.
@@ -11,7 +13,7 @@ from fixtures.zenith_fixtures import ZenithEnv
 #
 def test_readonly_node(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_readonly_node", "empty")
+    env.zenith_cli(["branch", "test_readonly_node", "empty"])

    pgmain = env.postgres.create_start('test_readonly_node')
    log.info("postgres is running on 'test_readonly_node' branch")
@@ -86,5 +88,4 @@ def test_readonly_node(zenith_simple_env: ZenithEnv):
    # Create node at pre-initdb lsn
    with pytest.raises(Exception, match="invalid basebackup lsn"):
        # compute node startup with invalid LSN should fail
-        env.zenith_cli.pg_start("test_readonly_node_preinitdb",
-                                timeline_spec="test_readonly_node@0/42")
+        env.zenith_cli(["pg", "start", "test_readonly_node_preinitdb", "test_readonly_node@0/42"])
--- a/test_runner/batch_others/test_remote_storage.py
+++ b/test_runner/batch_others/test_remote_storage.py
@@ -9,6 +9,8 @@ from fixtures.zenith_fixtures import ZenithEnvBuilder
 from fixtures.log_helper import log
 import pytest

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Tests that a piece of data is backed up and restored correctly:
@@ -42,7 +44,7 @@ def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder,
    data_secret = 'very secret secret'

    ##### First start, insert secret data and upload it to the remote storage
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()
    pg = env.postgres.create_start()

    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
--- a/test_runner/batch_others/test_restart_compute.py
+++ b/test_runner/batch_others/test_restart_compute.py
@@ -4,6 +4,8 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnvBuilder
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test restarting and recreating a postgres instance
@@ -13,9 +15,9 @@ def test_restart_compute(zenith_env_builder: ZenithEnvBuilder, with_wal_acceptor
    zenith_env_builder.pageserver_auth_enabled = True
    if with_wal_acceptors:
        zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_restart_compute", "main")
+    env.zenith_cli(["branch", "test_restart_compute", "main"])

    pg = env.postgres.create_start('test_restart_compute')
    log.info("postgres is running on 'test_restart_compute' branch")
--- a/test_runner/batch_others/test_snapfiles_gc.py
+++ b/test_runner/batch_others/test_snapfiles_gc.py
@@ -5,6 +5,8 @@ from fixtures.utils import print_gc_result
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test Garbage Collection of old layer files
@@ -14,7 +16,7 @@ from fixtures.log_helper import log
 #
 def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_layerfiles_gc", "empty")
+    env.zenith_cli(["branch", "test_layerfiles_gc", "empty"])
    pg = env.postgres.create_start('test_layerfiles_gc')

    with closing(pg.connect()) as conn:
@@ -48,7 +50,7 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
                    cur.execute("DELETE FROM foo")

                    log.info("Running GC before test")
-                    pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
                    row = pscur.fetchone()
                    print_gc_result(row)
                    # remember the number of files
@@ -61,7 +63,7 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
                    # removing the old image and delta layer.
                    log.info("Inserting one row and running GC")
                    cur.execute("INSERT INTO foo VALUES (1)")
-                    pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
                    row = pscur.fetchone()
                    print_gc_result(row)
                    assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
@@ -75,7 +77,7 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
                    cur.execute("INSERT INTO foo VALUES (2)")
                    cur.execute("INSERT INTO foo VALUES (3)")

-                    pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
                    row = pscur.fetchone()
                    print_gc_result(row)
                    assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
@@ -87,7 +89,7 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
                    cur.execute("INSERT INTO foo VALUES (2)")
                    cur.execute("INSERT INTO foo VALUES (3)")

-                    pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
                    row = pscur.fetchone()
                    print_gc_result(row)
                    assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
@@ -96,7 +98,7 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):

                    # Run GC again, with no changes in the database. Should not remove anything.
                    log.info("Run GC again, with nothing to do")
-                    pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
                    row = pscur.fetchone()
                    print_gc_result(row)
                    assert row['layer_relfiles_total'] == layer_relfiles_remain
@@ -109,7 +111,7 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
                    log.info("Drop table and run GC again")
                    cur.execute("DROP TABLE foo")

-                    pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
                    row = pscur.fetchone()
                    print_gc_result(row)

--- a/test_runner/batch_others/test_subxacts.py
+++ b/test_runner/batch_others/test_subxacts.py
@@ -1,6 +1,8 @@
 from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 # Test subtransactions
 #
@@ -10,7 +12,8 @@ from fixtures.log_helper import log
 # CLOG.
 def test_subxacts(zenith_simple_env: ZenithEnv, test_output_dir):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_subxacts", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_subxacts", "empty"])
    pg = env.postgres.create_start('test_subxacts')

    log.info("postgres is running on 'test_subxacts' branch")
--- a/test_runner/batch_others/test_tenant_relocation.py
+++ b/test_runner/batch_others/test_tenant_relocation.py
@@ -108,8 +108,8 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
    log.info('load thread stopped')


-def assert_local(pageserver_http_client: ZenithPageserverHttpClient, tenant: UUID, timeline: str):
-    timeline_detail = pageserver_http_client.timeline_detail(tenant, UUID(timeline))
+def assert_local(pageserver_http_client: ZenithPageserverHttpClient, tenant: str, timeline: str):
+    timeline_detail = pageserver_http_client.timeline_detail(UUID(tenant), UUID(timeline))
    assert timeline_detail.get('type') == "Local", timeline_detail
    return timeline_detail

@@ -122,15 +122,15 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
    zenith_env_builder.num_safekeepers = 1
    zenith_env_builder.enable_local_fs_remote_storage()

-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    # create folder for remote storage mock
    remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'

-    tenant = env.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
+    tenant = env.create_tenant("74ee8b079a0e437eb0afea7d26a07209")
    log.info("tenant to relocate %s", tenant)

-    env.zenith_cli.create_branch("test_tenant_relocation", "main", tenant_id=tenant)
+    env.zenith_cli(["branch", "test_tenant_relocation", "main", f"--tenantid={tenant}"])

    tenant_pg = env.postgres.create_start(
        "test_tenant_relocation",
@@ -167,11 +167,11 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
    # run checkpoint manually to be sure that data landed in remote storage
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor() as pscur:
-            pscur.execute(f"do_gc {tenant.hex} {timeline}")
+            pscur.execute(f"do_gc {tenant} {timeline}")

    # ensure upload is completed
    pageserver_http_client = env.pageserver.http_client()
-    timeline_detail = pageserver_http_client.timeline_detail(tenant, UUID(timeline))
+    timeline_detail = pageserver_http_client.timeline_detail(UUID(tenant), UUID(timeline))
    assert timeline_detail['disk_consistent_lsn'] == timeline_detail['timeline_state']['Ready']

    log.info("inititalizing new pageserver")
@@ -194,7 +194,7 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
                               new_pageserver_http_port):

        # call to attach timeline to new pageserver
-        new_pageserver_http_client.timeline_attach(tenant, UUID(timeline))
+        new_pageserver_http_client.timeline_attach(UUID(tenant), UUID(timeline))
        # FIXME cannot handle duplicate download requests, subject to fix in https://github.com/zenithdb/zenith/issues/997
        time.sleep(5)
        # new pageserver should in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
@@ -241,7 +241,7 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
        # detach tenant from old pageserver before we check
        # that all the data is there to be sure that old pageserver
        # is no longer involved, and if it is, we will see the errors
-        pageserver_http_client.timeline_detach(tenant, UUID(timeline))
+        pageserver_http_client.timeline_detach(UUID(tenant), UUID(timeline))

        with pg_cur(tenant_pg) as cur:
            # check that data is still there
--- a/test_runner/batch_others/test_tenants.py
+++ b/test_runner/batch_others/test_tenants.py
@@ -10,17 +10,23 @@ def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_wal_acce
    if with_wal_acceptors:
        zenith_env_builder.num_safekeepers = 3

-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()
    """Tests tenants with and without wal acceptors"""
    tenant_1 = env.create_tenant()
    tenant_2 = env.create_tenant()

-    env.zenith_cli.create_branch(f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
-                                 "main",
-                                 tenant_id=tenant_1)
-    env.zenith_cli.create_branch(f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
-                                 "main",
-                                 tenant_id=tenant_2)
+    env.zenith_cli([
+        "branch",
+        f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
+        "main",
+        f"--tenantid={tenant_1}"
+    ])
+    env.zenith_cli([
+        "branch",
+        f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
+        "main",
+        f"--tenantid={tenant_2}"
+    ])

    pg_tenant1 = env.postgres.create_start(
        f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
--- a/test_runner/batch_others/test_timeline_size.py
+++ b/test_runner/batch_others/test_timeline_size.py
@@ -1,19 +1,17 @@
 from contextlib import closing
 from uuid import UUID
 import psycopg2.extras
-import psycopg2.errors
-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres
+from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log
-import time


 def test_timeline_size(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    # Branch at the point where only 100 rows were inserted
-    env.zenith_cli.create_branch("test_timeline_size", "empty")
+    env.zenith_cli(["branch", "test_timeline_size", "empty"])

    client = env.pageserver.http_client()
-    res = client.branch_detail(env.initial_tenant, "test_timeline_size")
+    res = client.branch_detail(UUID(env.initial_tenant), "test_timeline_size")
    assert res["current_logical_size"] == res["current_logical_size_non_incremental"]

    pgmain = env.postgres.create_start("test_timeline_size")
@@ -31,102 +29,9 @@ def test_timeline_size(zenith_simple_env: ZenithEnv):
                    FROM generate_series(1, 10) g
            """)

-            res = client.branch_detail(env.initial_tenant, "test_timeline_size")
+            res = client.branch_detail(UUID(env.initial_tenant), "test_timeline_size")
            assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
            cur.execute("TRUNCATE foo")

-            res = client.branch_detail(env.initial_tenant, "test_timeline_size")
+            res = client.branch_detail(UUID(env.initial_tenant), "test_timeline_size")
            assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
-
-
-# wait until received_lsn_lag is 0
-def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60):
-    started_at = time.time()
-
-    received_lsn_lag = 1
-    while received_lsn_lag > 0:
-        elapsed = time.time() - started_at
-        if elapsed > timeout:
-            raise RuntimeError(
-                f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()")
-
-        with closing(pgmain.connect()) as conn:
-            with conn.cursor() as cur:
-
-                cur.execute('''
-                    select  pg_size_pretty(pg_cluster_size()),
-                    pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn) as received_lsn_lag
-                    FROM backpressure_lsns();
-                ''')
-                res = cur.fetchone()
-                log.info(f"pg_cluster_size = {res[0]}, received_lsn_lag = {res[1]}")
-                received_lsn_lag = res[1]
-
-        time.sleep(polling_interval)
-
-
-def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
-    env.zenith_cli.create_branch("test_timeline_size_quota", "main")
-
-    client = env.pageserver.http_client()
-    res = client.branch_detail(env.initial_tenant, "test_timeline_size_quota")
-    assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
-
-    pgmain = env.postgres.create_start(
-        "test_timeline_size_quota",
-        # Set small limit for the test
-        config_lines=['zenith.max_cluster_size=30MB'],
-    )
-    log.info("postgres is running on 'test_timeline_size_quota' branch")
-
-    with closing(pgmain.connect()) as conn:
-        with conn.cursor() as cur:
-            cur.execute("CREATE EXTENSION zenith")  # TODO move it to zenith_fixtures?
-
-            cur.execute("CREATE TABLE foo (t text)")
-
-            wait_for_pageserver_catchup(pgmain)
-
-            # Insert many rows. This query must fail because of space limit
-            try:
-                cur.execute('''
-                    INSERT INTO foo
-                        SELECT 'long string to consume some space' || g
-                        FROM generate_series(1, 100000) g
-                ''')
-
-                wait_for_pageserver_catchup(pgmain)
-
-                cur.execute('''
-                    INSERT INTO foo
-                        SELECT 'long string to consume some space' || g
-                        FROM generate_series(1, 500000) g
-                ''')
-
-                # If we get here, the timeline size limit failed
-                log.error("Query unexpectedly succeeded")
-                assert False
-
-            except psycopg2.errors.DiskFull as err:
-                log.info(f"Query expectedly failed with: {err}")
-
-            # drop table to free space
-            cur.execute('DROP TABLE foo')
-
-            wait_for_pageserver_catchup(pgmain)
-
-            # create it again and insert some rows. This query must succeed
-            cur.execute("CREATE TABLE foo (t text)")
-            cur.execute('''
-                INSERT INTO foo
-                    SELECT 'long string to consume some space' || g
-                    FROM generate_series(1, 10000) g
-            ''')
-
-            wait_for_pageserver_catchup(pgmain)
-
-            cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())")
-            pg_cluster_size = cur.fetchone()
-            log.info(f"pg_cluster_size = {pg_cluster_size}")
--- a/test_runner/batch_others/test_twophase.py
+++ b/test_runner/batch_others/test_twophase.py
@@ -3,13 +3,15 @@ import os
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test branching, when a transaction is in prepared state
 #
 def test_twophase(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_twophase", "empty")
+    env.zenith_cli(["branch", "test_twophase", "empty"])

    pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
    log.info("postgres is running on 'test_twophase' branch")
@@ -56,7 +58,7 @@ def test_twophase(zenith_simple_env: ZenithEnv):
    assert len(twophase_files) == 2

    # Create a branch with the transaction in prepared state
-    env.zenith_cli.create_branch("test_twophase_prepared", "test_twophase")
+    env.zenith_cli(["branch", "test_twophase_prepared", "test_twophase"])

    # Start compute on the new branch
    pg2 = env.postgres.create_start(
--- a/test_runner/batch_others/test_vm_bits.py
+++ b/test_runner/batch_others/test_vm_bits.py
@@ -1,6 +1,8 @@
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 #
 # Test that the VM bit is cleared correctly at a HEAP_DELETE and
@@ -9,7 +11,8 @@ from fixtures.log_helper import log
 def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env

-    env.zenith_cli.create_branch("test_vm_bit_clear", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_vm_bit_clear", "empty"])
    pg = env.postgres.create_start('test_vm_bit_clear')

    log.info("postgres is running on 'test_vm_bit_clear' branch")
@@ -33,7 +36,7 @@ def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
    cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1')

    # Branch at this point, to test that later
-    env.zenith_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")
+    env.zenith_cli(["branch", "test_vm_bit_clear_new", "test_vm_bit_clear"])

    # Clear the buffer cache, to force the VM page to be re-fetched from
    # the page server
--- a/test_runner/batch_others/test_wal_acceptor.py
+++ b/test_runner/batch_others/test_wal_acceptor.py
@@ -17,14 +17,16 @@ from fixtures.utils import lsn_to_hex, mkdir_if_needed
 from fixtures.log_helper import log
 from typing import List, Optional, Any

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 # basic test, write something in setup with wal acceptors, ensure that commits
 # succeed and data is written
 def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_wal_acceptors_normal_work", "main")
+    env.zenith_cli(["branch", "test_wal_acceptors_normal_work", "main"])

    pg = env.postgres.create_start('test_wal_acceptors_normal_work')

@@ -51,7 +53,7 @@ class BranchMetrics:
 # against different timelines.
 def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    n_timelines = 3

@@ -60,10 +62,10 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
    # start postgres on each timeline
    pgs = []
    for branch in branches:
-        env.zenith_cli.create_branch(branch, "main")
+        env.zenith_cli(["branch", branch, "main"])
        pgs.append(env.postgres.create_start(branch))

-    tenant_id = env.initial_tenant
+    tenant_id = uuid.UUID(env.initial_tenant)

    def collect_metrics(message: str) -> List[BranchMetrics]:
        with env.pageserver.http_client() as pageserver_http:
@@ -90,8 +92,8 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
                    latest_valid_lsn=branch_detail["latest_valid_lsn"],
                )
                for sk_m in sk_metrics:
-                    m.flush_lsns.append(sk_m.flush_lsn_inexact[(tenant_id.hex, timeline_id)])
-                    m.commit_lsns.append(sk_m.commit_lsn_inexact[(tenant_id.hex, timeline_id)])
+                    m.flush_lsns.append(sk_m.flush_lsn_inexact[timeline_id])
+                    m.commit_lsns.append(sk_m.commit_lsn_inexact[timeline_id])

                for flush_lsn, commit_lsn in zip(m.flush_lsns, m.commit_lsns):
                    # Invariant. May be < when transaction is in progress.
@@ -181,9 +183,9 @@ def test_restarts(zenith_env_builder: ZenithEnvBuilder):
    n_acceptors = 3

    zenith_env_builder.num_safekeepers = n_acceptors
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_wal_acceptors_restarts", "main")
+    env.zenith_cli(["branch", "test_wal_acceptors_restarts", "main"])
    pg = env.postgres.create_start('test_wal_acceptors_restarts')

    # we rely upon autocommit after each statement
@@ -218,9 +220,9 @@ def delayed_wal_acceptor_start(wa):
 # When majority of acceptors is offline, commits are expected to be frozen
 def test_unavailability(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 2
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_wal_acceptors_unavailability", "main")
+    env.zenith_cli(["branch", "test_wal_acceptors_unavailability", "main"])
    pg = env.postgres.create_start('test_wal_acceptors_unavailability')

    # we rely upon autocommit after each statement
@@ -289,9 +291,9 @@ def stop_value():
 def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):

    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_wal_acceptors_race_conditions", "main")
+    env.zenith_cli(["branch", "test_wal_acceptors_race_conditions", "main"])
    pg = env.postgres.create_start('test_wal_acceptors_race_conditions')

    # we rely upon autocommit after each statement
@@ -319,16 +321,16 @@ class ProposerPostgres(PgProtocol):
    def __init__(self,
                 pgdata_dir: str,
                 pg_bin,
-                 timeline_id: uuid.UUID,
-                 tenant_id: uuid.UUID,
+                 timeline_id: str,
+                 tenant_id: str,
                 listen_addr: str,
                 port: int):
-        super().__init__(host=listen_addr, port=port, username='zenith_admin')
+        super().__init__(host=listen_addr, port=port)

        self.pgdata_dir: str = pgdata_dir
        self.pg_bin: PgBin = pg_bin
-        self.timeline_id: uuid.UUID = timeline_id
-        self.tenant_id: uuid.UUID = tenant_id
+        self.timeline_id: str = timeline_id
+        self.tenant_id: str = tenant_id
        self.listen_addr: str = listen_addr
        self.port: int = port

@@ -348,8 +350,8 @@ class ProposerPostgres(PgProtocol):
            cfg = [
                "synchronous_standby_names = 'walproposer'\n",
                "shared_preload_libraries = 'zenith'\n",
-                f"zenith.zenith_timeline = '{self.timeline_id.hex}'\n",
-                f"zenith.zenith_tenant = '{self.tenant_id.hex}'\n",
+                f"zenith.zenith_timeline = '{self.timeline_id}'\n",
+                f"zenith.zenith_tenant = '{self.tenant_id}'\n",
                f"zenith.page_server_connstring = ''\n",
                f"wal_acceptors = '{wal_acceptors}'\n",
                f"listen_addresses = '{self.listen_addr}'\n",
@@ -404,10 +406,10 @@ def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder,
    # We don't really need the full environment for this test, just the
    # safekeepers would be enough.
    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    timeline_id = uuid.uuid4()
-    tenant_id = uuid.uuid4()
+    timeline_id = uuid.uuid4().hex
+    tenant_id = uuid.uuid4().hex

    # write config for proposer
    pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata")
@@ -454,9 +456,9 @@ def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder,
 def test_timeline_status(zenith_env_builder: ZenithEnvBuilder):

    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_timeline_status", "main")
+    env.zenith_cli(["branch", "test_timeline_status", "main"])
    pg = env.postgres.create_start('test_timeline_status')

    wa = env.safekeepers[0]
@@ -493,15 +495,15 @@ class SafekeeperEnv:
        self.bin_safekeeper = os.path.join(str(zenith_binpath), 'safekeeper')
        self.safekeepers: Optional[List[subprocess.CompletedProcess[Any]]] = None
        self.postgres: Optional[ProposerPostgres] = None
-        self.tenant_id: Optional[uuid.UUID] = None
-        self.timeline_id: Optional[uuid.UUID] = None
+        self.tenant_id: Optional[str] = None
+        self.timeline_id: Optional[str] = None

    def init(self) -> "SafekeeperEnv":
        assert self.postgres is None, "postgres is already initialized"
        assert self.safekeepers is None, "safekeepers are already initialized"

-        self.timeline_id = uuid.uuid4()
-        self.tenant_id = uuid.uuid4()
+        self.timeline_id = uuid.uuid4().hex
+        self.tenant_id = uuid.uuid4().hex
        mkdir_if_needed(str(self.repo_dir))

        # Create config and a Safekeeper object for each safekeeper
@@ -521,7 +523,12 @@ class SafekeeperEnv:
            http=self.port_distributor.get_port(),
        )

-        safekeeper_dir = os.path.join(self.repo_dir, f"sk{i}")
+        if self.num_safekeepers == 1:
+            name = "single"
+        else:
+            name = f"sk{i}"
+
+        safekeeper_dir = os.path.join(self.repo_dir, name)
        mkdir_if_needed(safekeeper_dir)

        args = [
@@ -532,8 +539,6 @@ class SafekeeperEnv:
            f"127.0.0.1:{port.http}",
            "-D",
            safekeeper_dir,
-            "--id",
-            str(i),
            "--daemonize"
        ]

@@ -601,8 +606,9 @@ def test_safekeeper_without_pageserver(test_output_dir: str,


 def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
-    def safekeepers_guc(env: ZenithEnv, sk_names: List[int]) -> str:
-        return ','.join([f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.id in sk_names])
+    def safekeepers_guc(env: ZenithEnv, sk_names: List[str]) -> str:
+        return ','.join(
+            [f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.name in sk_names])

    def execute_payload(pg: Postgres):
        with closing(pg.connect()) as conn:
@@ -624,17 +630,17 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
            http_cli = sk.http_client()
            try:
                status = http_cli.timeline_status(tenant_id, timeline_id)
-                log.info(f"Safekeeper {sk.id} status: {status}")
+                log.info(f"Safekeeper {sk.name} status: {status}")
            except Exception as e:
-                log.info(f"Safekeeper {sk.id} status error: {e}")
+                log.info(f"Safekeeper {sk.name} status error: {e}")

    zenith_env_builder.num_safekeepers = 4
-    env = zenith_env_builder.init_start()
-    env.zenith_cli.create_branch("test_replace_safekeeper", "main")
+    env = zenith_env_builder.init()
+    env.zenith_cli(["branch", "test_replace_safekeeper", "main"])

    log.info("Use only first 3 safekeepers")
    env.safekeepers[3].stop()
-    active_safekeepers = [1, 2, 3]
+    active_safekeepers = ['sk1', 'sk2', 'sk3']
    pg = env.postgres.create('test_replace_safekeeper')
    pg.adjust_for_wal_acceptors(safekeepers_guc(env, active_safekeepers))
    pg.start()
@@ -674,7 +680,7 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):

    log.info("Recreate postgres to replace failed sk1 with new sk4")
    pg.stop_and_destroy().create('test_replace_safekeeper')
-    active_safekeepers = [2, 3, 4]
+    active_safekeepers = ['sk2', 'sk3', 'sk4']
    env.safekeepers[3].start()
    pg.adjust_for_wal_acceptors(safekeepers_guc(env, active_safekeepers))
    pg.start()
--- a/test_runner/batch_others/test_wal_acceptor_async.py
+++ b/test_runner/batch_others/test_wal_acceptor_async.py
@@ -9,6 +9,7 @@ from fixtures.utils import lsn_from_hex, lsn_to_hex
 from typing import List

 log = getLogger('root.wal_acceptor_async')
+pytest_plugins = ("fixtures.zenith_fixtures")


 class BankClient(object):
@@ -200,9 +201,9 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[Safekeeper], n_w
 # restart acceptors one by one, while executing and validating bank transactions
 def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

-    env.zenith_cli.create_branch("test_wal_acceptors_restarts_under_load", "main")
+    env.zenith_cli(["branch", "test_wal_acceptors_restarts_under_load", "main"])
    pg = env.postgres.create_start('test_wal_acceptors_restarts_under_load')

    asyncio.run(run_restarts_under_load(pg, env.safekeepers))
--- a/test_runner/batch_others/test_zenith_cli.py
+++ b/test_runner/batch_others/test_zenith_cli.py
@@ -1,28 +1,31 @@
 import json
 import uuid
-import requests

 from psycopg2.extensions import cursor as PgCursor
-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
+from fixtures.zenith_fixtures import ZenithEnv
 from typing import cast

+pytest_plugins = ("fixtures.zenith_fixtures")

-def helper_compare_branch_list(pageserver_http_client: ZenithPageserverHttpClient,
-                               env: ZenithEnv,
-                               initial_tenant: uuid.UUID):
+
+def helper_compare_branch_list(page_server_cur: PgCursor, env: ZenithEnv, initial_tenant: str):
    """
    Compare branches list returned by CLI and directly via API.
    Filters out branches created by other tests.
    """
-    branches = pageserver_http_client.branch_list(initial_tenant)
-    branches_api = sorted(map(lambda b: cast(str, b['name']), branches))
+
+    page_server_cur.execute(f'branch_list {initial_tenant}')
+    branches_api = sorted(
+        map(lambda b: cast(str, b['name']), json.loads(page_server_cur.fetchone()[0])))
    branches_api = [b for b in branches_api if b.startswith('test_cli_') or b in ('empty', 'main')]

-    res = env.zenith_cli.list_branches()
+    res = env.zenith_cli(["branch"])
+    res.check_returncode()
    branches_cli = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
    branches_cli = [b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')]

-    res = env.zenith_cli.list_branches(tenant_id=initial_tenant)
+    res = env.zenith_cli(["branch", f"--tenantid={initial_tenant}"])
+    res.check_returncode()
    branches_cli_with_tenant_arg = sorted(
        map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
    branches_cli_with_tenant_arg = [
@@ -34,20 +37,24 @@ def helper_compare_branch_list(pageserver_http_client: ZenithPageserverHttpClien

 def test_cli_branch_list(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    pageserver_http_client = env.pageserver.http_client()
+    page_server_conn = env.pageserver.connect()
+    page_server_cur = page_server_conn.cursor()

    # Initial sanity check
-    helper_compare_branch_list(pageserver_http_client, env, env.initial_tenant)
-    env.zenith_cli.create_branch("test_cli_branch_list_main", "empty")
-    helper_compare_branch_list(pageserver_http_client, env, env.initial_tenant)
+    helper_compare_branch_list(page_server_cur, env, env.initial_tenant)
+
+    # Create a branch for us
+    res = env.zenith_cli(["branch", "test_cli_branch_list_main", "empty"])
+    assert res.stderr == ''
+    helper_compare_branch_list(page_server_cur, env, env.initial_tenant)

    # Create a nested branch
-    res = env.zenith_cli.create_branch("test_cli_branch_list_nested", "test_cli_branch_list_main")
+    res = env.zenith_cli(["branch", "test_cli_branch_list_nested", "test_cli_branch_list_main"])
    assert res.stderr == ''
-    helper_compare_branch_list(pageserver_http_client, env, env.initial_tenant)
+    helper_compare_branch_list(page_server_cur, env, env.initial_tenant)

    # Check that all new branches are visible via CLI
-    res = env.zenith_cli.list_branches()
+    res = env.zenith_cli(["branch"])
    assert res.stderr == ''
    branches_cli = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))

@@ -55,11 +62,12 @@ def test_cli_branch_list(zenith_simple_env: ZenithEnv):
    assert 'test_cli_branch_list_nested' in branches_cli


-def helper_compare_tenant_list(pageserver_http_client: ZenithPageserverHttpClient, env: ZenithEnv):
-    tenants = pageserver_http_client.tenant_list()
-    tenants_api = sorted(map(lambda t: cast(str, t['id']), tenants))
+def helper_compare_tenant_list(page_server_cur: PgCursor, env: ZenithEnv):
+    page_server_cur.execute(f'tenant_list')
+    tenants_api = sorted(
+        map(lambda t: cast(str, t['id']), json.loads(page_server_cur.fetchone()[0])))

-    res = env.zenith_cli.list_tenants()
+    res = env.zenith_cli(["tenant", "list"])
    assert res.stderr == ''
    tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))

@@ -68,62 +76,32 @@ def helper_compare_tenant_list(pageserver_http_client: ZenithPageserverHttpClien

 def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
-    pageserver_http_client = env.pageserver.http_client()
+    page_server_conn = env.pageserver.connect()
+    page_server_cur = page_server_conn.cursor()
+
    # Initial sanity check
-    helper_compare_tenant_list(pageserver_http_client, env)
+    helper_compare_tenant_list(page_server_cur, env)

    # Create new tenant
-    tenant1 = uuid.uuid4()
-    env.zenith_cli.create_tenant(tenant1)
+    tenant1 = uuid.uuid4().hex
+    res = env.zenith_cli(["tenant", "create", tenant1])
+    res.check_returncode()

    # check tenant1 appeared
-    helper_compare_tenant_list(pageserver_http_client, env)
+    helper_compare_tenant_list(page_server_cur, env)

    # Create new tenant
-    tenant2 = uuid.uuid4()
-    env.zenith_cli.create_tenant(tenant2)
+    tenant2 = uuid.uuid4().hex
+    res = env.zenith_cli(["tenant", "create", tenant2])
+    res.check_returncode()

    # check tenant2 appeared
-    helper_compare_tenant_list(pageserver_http_client, env)
+    helper_compare_tenant_list(page_server_cur, env)

-    res = env.zenith_cli.list_tenants()
+    res = env.zenith_cli(["tenant", "list"])
+    res.check_returncode()
    tenants = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))

-    assert env.initial_tenant.hex in tenants
-    assert tenant1.hex in tenants
-    assert tenant2.hex in tenants
-
-
-def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder):
-    # Start with single sk
-    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
-
-    # Connect to sk port on v4 loopback
-    res = requests.get(f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status')
-    assert res.ok
-
-    # FIXME Test setup is using localhost:xx in ps config.
-    # Perhaps consider switching test suite to v4 loopback.
-
-    # Connect to ps port on v4 loopback
-    # res = requests.get(f'http://127.0.0.1:{env.pageserver.service_port.http}/v1/status')
-    # assert res.ok
-
-
-def test_cli_start_stop(zenith_env_builder: ZenithEnvBuilder):
-    # Start with single sk
-    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
-
-    # Stop default ps/sk
-    env.zenith_cli.pageserver_stop()
-    env.zenith_cli.safekeeper_stop()
-
-    # Default start
-    res = env.zenith_cli.raw_cli(["start"])
-    res.check_returncode()
-
-    # Default stop
-    res = env.zenith_cli.raw_cli(["stop"])
-    res.check_returncode()
+    assert env.initial_tenant in tenants
+    assert tenant1 in tenants
+    assert tenant2 in tenants
--- a/test_runner/batch_pg_regress/test_isolation.py
+++ b/test_runner/batch_pg_regress/test_isolation.py
@@ -3,11 +3,15 @@ import os
 from fixtures.utils import mkdir_if_needed
 from fixtures.zenith_fixtures import ZenithEnv, base_dir, pg_distrib_dir

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 def test_isolation(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
    env = zenith_simple_env

-    env.zenith_cli.create_branch("test_isolation", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_isolation", "empty"])
+
    # Connect to postgres and create a database called "regression".
    # isolation tests use prepared transactions, so enable them
    pg = env.postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100'])
--- a/test_runner/batch_pg_regress/test_pg_regress.py
+++ b/test_runner/batch_pg_regress/test_pg_regress.py
@@ -3,11 +3,15 @@ import os
 from fixtures.utils import mkdir_if_needed
 from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content, base_dir, pg_distrib_dir

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 def test_pg_regress(zenith_simple_env: ZenithEnv, test_output_dir: str, pg_bin, capsys):
    env = zenith_simple_env

-    env.zenith_cli.create_branch("test_pg_regress", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_pg_regress", "empty"])
+
    # Connect to postgres and create a database called "regression".
    pg = env.postgres.create_start('test_pg_regress')
    pg.safe_psql('CREATE DATABASE regression')
--- a/test_runner/batch_pg_regress/test_zenith_regress.py
+++ b/test_runner/batch_pg_regress/test_zenith_regress.py
@@ -7,11 +7,15 @@ from fixtures.zenith_fixtures import (ZenithEnv,
                                      pg_distrib_dir)
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures")
+

 def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
    env = zenith_simple_env

-    env.zenith_cli.create_branch("test_zenith_regress", "empty")
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_zenith_regress", "empty"])
+
    # Connect to postgres and create a database called "regression".
    pg = env.postgres.create_start('test_zenith_regress')
    pg.safe_psql('CREATE DATABASE regression')
--- a/test_runner/conftest.py
+++ b/test_runner/conftest.py
@@ -1,6 +1 @@
-pytest_plugins = (
-    "fixtures.zenith_fixtures",
-    "fixtures.benchmark_fixture",
-    "fixtures.compare_fixtures",
-    "fixtures.slow",
-)
+pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -8,7 +8,6 @@ import timeit
 import calendar
 import enum
 from datetime import datetime
-import uuid
 import pytest
 from _pytest.config import Config
 from _pytest.terminal import TerminalReporter
@@ -27,6 +26,8 @@ bencmark, and then record the result by calling zenbenchmark.record. For example
 import timeit
 from fixtures.zenith_fixtures import ZenithEnv

+pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+
 def test_mybench(zenith_simple_env: env, zenbenchmark):

    # Initialize the test
@@ -39,8 +40,6 @@ def test_mybench(zenith_simple_env: env, zenbenchmark):
    # Record another measurement
    zenbenchmark.record('speed_of_light', 300000, 'km/s')

-There's no need to import this file to use it. It should be declared as a plugin
-inside conftest.py, and that makes it available to all tests.

 You can measure multiple things in one test, and record each one with a separate
 call to zenbenchmark. For example, you could time the bulk loading that happens
@@ -277,11 +276,11 @@ class ZenithBenchmarker:
        assert matches
        return int(round(float(matches.group(1))))

-    def get_timeline_size(self, repo_dir: Path, tenantid: uuid.UUID, timelineid: str):
+    def get_timeline_size(self, repo_dir: Path, tenantid: str, timelineid: str):
        """
        Calculate the on-disk size of a timeline
        """
-        path = "{}/tenants/{}/timelines/{}".format(repo_dir, tenantid.hex, timelineid)
+        path = "{}/tenants/{}/timelines/{}".format(repo_dir, tenantid, timelineid)

        totalbytes = 0
        for root, dirs, files in os.walk(path):
--- a/test_runner/fixtures/compare_fixtures.py
+++ b/test_runner/fixtures/compare_fixtures.py
@@ -1,200 +0,0 @@
-import pytest
-from contextlib import contextmanager
-from abc import ABC, abstractmethod
-
-from fixtures.zenith_fixtures import PgBin, PgProtocol, VanillaPostgres, ZenithEnv
-from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
-
-# Type-related stuff
-from typing import Iterator
-
-
-class PgCompare(ABC):
-    """Common interface of all postgres implementations, useful for benchmarks.
-
-    This class is a helper class for the zenith_with_baseline fixture. See its documentation
-    for more details.
-    """
-    @property
-    @abstractmethod
-    def pg(self) -> PgProtocol:
-        pass
-
-    @property
-    @abstractmethod
-    def pg_bin(self) -> PgBin:
-        pass
-
-    @property
-    def zenbenchmark(self) -> ZenithBenchmarker:
-        pass
-
-    @abstractmethod
-    def flush(self) -> None:
-        pass
-
-    @abstractmethod
-    def report_peak_memory_use(self) -> None:
-        pass
-
-    @abstractmethod
-    def report_size(self) -> None:
-        pass
-
-    @contextmanager
-    @abstractmethod
-    def record_pageserver_writes(self, out_name):
-        pass
-
-    @contextmanager
-    @abstractmethod
-    def record_duration(self, out_name):
-        pass
-
-
-class ZenithCompare(PgCompare):
-    """PgCompare interface for the zenith stack."""
-    def __init__(self,
-                 zenbenchmark: ZenithBenchmarker,
-                 zenith_simple_env: ZenithEnv,
-                 pg_bin: PgBin,
-                 branch_name):
-        self.env = zenith_simple_env
-        self._zenbenchmark = zenbenchmark
-        self._pg_bin = pg_bin
-
-        # We only use one branch and one timeline
-        self.branch = branch_name
-        self.env.zenith_cli.create_branch(self.branch, "empty")
-        self._pg = self.env.postgres.create_start(self.branch)
-        self.timeline = self.pg.safe_psql("SHOW zenith.zenith_timeline")[0][0]
-
-        # Long-lived cursor, useful for flushing
-        self.psconn = self.env.pageserver.connect()
-        self.pscur = self.psconn.cursor()
-
-    @property
-    def pg(self):
-        return self._pg
-
-    @property
-    def zenbenchmark(self):
-        return self._zenbenchmark
-
-    @property
-    def pg_bin(self):
-        return self._pg_bin
-
-    def flush(self):
-        self.pscur.execute(f"do_gc {self.env.initial_tenant.hex} {self.timeline} 0")
-
-    def report_peak_memory_use(self) -> None:
-        self.zenbenchmark.record("peak_mem",
-                                 self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024,
-                                 'MB',
-                                 report=MetricReport.LOWER_IS_BETTER)
-
-    def report_size(self) -> None:
-        timeline_size = self.zenbenchmark.get_timeline_size(self.env.repo_dir,
-                                                            self.env.initial_tenant,
-                                                            self.timeline)
-        self.zenbenchmark.record('size',
-                                 timeline_size / (1024 * 1024),
-                                 'MB',
-                                 report=MetricReport.LOWER_IS_BETTER)
-
-    def record_pageserver_writes(self, out_name):
-        return self.zenbenchmark.record_pageserver_writes(self.env.pageserver, out_name)
-
-    def record_duration(self, out_name):
-        return self.zenbenchmark.record_duration(out_name)
-
-
-class VanillaCompare(PgCompare):
-    """PgCompare interface for vanilla postgres."""
-    def __init__(self, zenbenchmark, vanilla_pg: VanillaPostgres):
-        self._pg = vanilla_pg
-        self._zenbenchmark = zenbenchmark
-        vanilla_pg.configure(['shared_buffers=1MB'])
-        vanilla_pg.start()
-
-        # Long-lived cursor, useful for flushing
-        self.conn = self.pg.connect()
-        self.cur = self.conn.cursor()
-
-    @property
-    def pg(self):
-        return self._pg
-
-    @property
-    def zenbenchmark(self):
-        return self._zenbenchmark
-
-    @property
-    def pg_bin(self):
-        return self._pg.pg_bin
-
-    def flush(self):
-        self.cur.execute("checkpoint")
-
-    def report_peak_memory_use(self) -> None:
-        pass  # TODO find something
-
-    def report_size(self) -> None:
-        data_size = self.pg.get_subdir_size('base')
-        self.zenbenchmark.record('data_size',
-                                 data_size / (1024 * 1024),
-                                 'MB',
-                                 report=MetricReport.LOWER_IS_BETTER)
-        wal_size = self.pg.get_subdir_size('pg_wal')
-        self.zenbenchmark.record('wal_size',
-                                 wal_size / (1024 * 1024),
-                                 'MB',
-                                 report=MetricReport.LOWER_IS_BETTER)
-
-    @contextmanager
-    def record_pageserver_writes(self, out_name):
-        yield  # Do nothing
-
-    def record_duration(self, out_name):
-        return self.zenbenchmark.record_duration(out_name)
-
-
-@pytest.fixture(scope='function')
-def zenith_compare(request, zenbenchmark, pg_bin, zenith_simple_env) -> ZenithCompare:
-    branch_name = request.node.name
-    return ZenithCompare(zenbenchmark, zenith_simple_env, pg_bin, branch_name)
-
-
-@pytest.fixture(scope='function')
-def vanilla_compare(zenbenchmark, vanilla_pg) -> VanillaCompare:
-    return VanillaCompare(zenbenchmark, vanilla_pg)
-
-
-@pytest.fixture(params=["vanilla_compare", "zenith_compare"], ids=["vanilla", "zenith"])
-def zenith_with_baseline(request) -> PgCompare:
-    """Parameterized fixture that helps compare zenith against vanilla postgres.
-
-    A test that uses this fixture turns into a parameterized test that runs against:
-    1. A vanilla postgres instance
-    2. A simple zenith env (see zenith_simple_env)
-    3. Possibly other postgres protocol implementations.
-
-    The main goal of this fixture is to make it easier for people to read and write
-    performance tests. Easy test writing leads to more tests.
-
-    Perfect encapsulation of the postgres implementations is **not** a goal because
-    it's impossible. Operational and configuration differences in the different
-    implementations sometimes matter, and the writer of the test should be mindful
-    of that.
-
-    If a test requires some one-off special implementation-specific logic, use of
-    isinstance(zenith_with_baseline, ZenithCompare) is encouraged. Though if that
-    implementation-specific logic is widely useful across multiple tests, it might
-    make sense to add methods to the PgCompare class.
-    """
-    fixture = request.getfixturevalue(request.param)
-    if isinstance(fixture, PgCompare):
-        return fixture
-    else:
-        raise AssertionError(f"test error: fixture {request.param} is not PgCompare")
--- a/test_runner/fixtures/slow.py
+++ b/test_runner/fixtures/slow.py
@@ -1,26 +0,0 @@
-import pytest
-"""
-This plugin allows tests to be marked as slow using pytest.mark.slow. By default slow
-tests are excluded. They need to be specifically requested with the --runslow flag in
-order to run.
-
-Copied from here: https://docs.pytest.org/en/latest/example/simple.html
-"""
-
-
-def pytest_addoption(parser):
-    parser.addoption("--runslow", action="store_true", default=False, help="run slow tests")
-
-
-def pytest_configure(config):
-    config.addinivalue_line("markers", "slow: mark test as slow to run")
-
-
-def pytest_collection_modifyitems(config, items):
-    if config.getoption("--runslow"):
-        # --runslow given in cli: do not skip slow tests
-        return
-    skip_slow = pytest.mark.skip(reason="need --runslow option to run")
-    for item in items:
-        if "slow" in item.keywords:
-            item.add_marker(skip_slow)
--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -1,7 +1,6 @@
 from __future__ import annotations

 from dataclasses import dataclass, field
-import textwrap
 from cached_property import cached_property
 import asyncpg
 import os
@@ -27,7 +26,7 @@ from dataclasses import dataclass

 # Type-related stuff
 from psycopg2.extensions import connection as PgConnection
-from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, TypeVar, cast, Union, Tuple
+from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union
 from typing_extensions import Literal
 import pytest

@@ -45,8 +44,9 @@ the standard pytest.fixture with some extra behavior.
 There are several environment variables that can control the running of tests:
 ZENITH_BIN, POSTGRES_DISTRIB_DIR, etc. See README.md for more information.

-There's no need to import this file to use it. It should be declared as a plugin
-inside conftest.py, and that makes it available to all tests.
+To use fixtures in a test file, add this line of code:
+
+>>> pytest_plugins = ("fixtures.zenith_fixtures")

 Don't import functions from this file, or pytest will emit warnings. Instead
 put directly-importable functions into utils.py or another separate file.
@@ -184,16 +184,6 @@ def worker_base_port(worker_seq_no: int):
    return BASE_PORT + worker_seq_no * WORKER_PORT_NUM


-def get_dir_size(path: str) -> int:
-    """Return size in bytes."""
-    totalbytes = 0
-    for root, dirs, files in os.walk(path):
-        for name in files:
-            totalbytes += os.path.getsize(os.path.join(root, name))
-
-    return totalbytes
-
-
 def can_bind(host: str, port: int) -> bool:
    """
    Check whether a host:port is available to bind for listening
@@ -240,7 +230,7 @@ class PgProtocol:
    def __init__(self, host: str, port: int, username: Optional[str] = None):
        self.host = host
        self.port = port
-        self.username = username
+        self.username = username or "zenith_admin"

    def connstr(self,
                *,
@@ -252,15 +242,10 @@ class PgProtocol:
        """

        username = username or self.username
-        res = f'host={self.host} port={self.port} dbname={dbname}'
-
-        if username:
-            res = f'{res} user={username}'
-
-        if password:
-            res = f'{res} password={password}'
-
-        return res
+        res = f'host={self.host} port={self.port} user={username} dbname={dbname}'
+        if not password:
+            return res
+        return f'{res} password={password}'

    # autocommit=True here by default because that's what we need most of the time
    def connect(self,
@@ -425,14 +410,6 @@ class ZenithEnvBuilder:
        self.env = ZenithEnv(self)
        return self.env

-    def start(self):
-        self.env.start()
-
-    def init_start(self) -> ZenithEnv:
-        env = self.init()
-        self.start()
-        return env
-
    """
    Sets up the pageserver to use the local fs at the `test_dir/local_fs_remote_storage` path.
    Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
@@ -524,7 +501,6 @@ class ZenithEnv:
        self.rust_log_override = config.rust_log_override
        self.port_distributor = config.port_distributor
        self.s3_mock_server = config.s3_mock_server
-        self.zenith_cli = ZenithCli(env=self)

        self.postgres = PostgresFactory(self)

@@ -532,12 +508,12 @@ class ZenithEnv:

        # generate initial tenant ID here instead of letting 'zenith init' generate it,
        # so that we don't need to dig it out of the config file afterwards.
-        self.initial_tenant = uuid.uuid4()
+        self.initial_tenant = uuid.uuid4().hex

        # Create a config file corresponding to the options
-        toml = textwrap.dedent(f"""
-            default_tenantid = '{self.initial_tenant.hex}'
-        """)
+        toml = f"""
+default_tenantid = '{self.initial_tenant}'
+        """

        # Create config for pageserver
        pageserver_port = PageserverPort(
@@ -546,13 +522,12 @@ class ZenithEnv:
        )
        pageserver_auth_type = "ZenithJWT" if config.pageserver_auth_enabled else "Trust"

-        toml += textwrap.dedent(f"""
-            [pageserver]
-            id=1
-            listen_pg_addr = 'localhost:{pageserver_port.pg}'
-            listen_http_addr = 'localhost:{pageserver_port.http}'
-            auth_type = '{pageserver_auth_type}'
-        """)
+        toml += f"""
+[pageserver]
+listen_pg_addr = 'localhost:{pageserver_port.pg}'
+listen_http_addr = 'localhost:{pageserver_port.http}'
+auth_type = '{pageserver_auth_type}'
+        """

        # Create a corresponding ZenithPageserver object
        self.pageserver = ZenithPageserver(self,
@@ -565,22 +540,33 @@ class ZenithEnv:
                pg=self.port_distributor.get_port(),
                http=self.port_distributor.get_port(),
            )
-            id = i  # assign ids sequentially
-            toml += textwrap.dedent(f"""
-                [[safekeepers]]
-                id = {id}
-                pg_port = {port.pg}
-                http_port = {port.http}
-                sync = false # Disable fsyncs to make the tests go faster
-            """)
-            safekeeper = Safekeeper(env=self, id=id, port=port)
+
+            if config.num_safekeepers == 1:
+                name = "single"
+            else:
+                name = f"sk{i}"
+            toml += f"""
+[[safekeepers]]
+name = '{name}'
+pg_port = {port.pg}
+http_port = {port.http}
+sync = false # Disable fsyncs to make the tests go faster
+            """
+            safekeeper = Safekeeper(env=self, name=name, port=port)
            self.safekeepers.append(safekeeper)

        log.info(f"Config: {toml}")

-        self.zenith_cli.init(toml)
+        # Run 'zenith init' using the config file we constructed
+        with tempfile.NamedTemporaryFile(mode='w+') as tmp:
+            tmp.write(toml)
+            tmp.flush()
+
+            cmd = ['init', f'--config={tmp.name}']
+            append_pageserver_param_overrides(cmd, config.pageserver_remote_storage)
+
+            self.zenith_cli(cmd)

-    def start(self):
        # Start up the page server and all the safekeepers
        self.pageserver.start()

@@ -591,12 +577,69 @@ class ZenithEnv:
        """ Get list of safekeeper endpoints suitable for wal_acceptors GUC  """
        return ','.join([f'localhost:{wa.port.pg}' for wa in self.safekeepers])

-    def create_tenant(self, tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
+    def create_tenant(self, tenant_id: Optional[str] = None):
        if tenant_id is None:
-            tenant_id = uuid.uuid4()
-        self.zenith_cli.create_tenant(tenant_id)
+            tenant_id = uuid.uuid4().hex
+        res = self.zenith_cli(['tenant', 'create', tenant_id])
+        res.check_returncode()
        return tenant_id

+    def zenith_cli(self, arguments: List[str]) -> 'subprocess.CompletedProcess[str]':
+        """
+        Run "zenith" with the specified arguments.
+
+        Arguments must be in list form, e.g. ['pg', 'create']
+
+        Return both stdout and stderr, which can be accessed as
+
+        >>> result = env.zenith_cli(...)
+        >>> assert result.stderr == ""
+        >>> log.info(result.stdout)
+        """
+
+        assert type(arguments) == list
+
+        bin_zenith = os.path.join(str(zenith_binpath), 'zenith')
+
+        args = [bin_zenith] + arguments
+        log.info('Running command "{}"'.format(' '.join(args)))
+        log.info(f'Running in "{self.repo_dir}"')
+
+        env_vars = os.environ.copy()
+        env_vars['ZENITH_REPO_DIR'] = str(self.repo_dir)
+        env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
+
+        if self.rust_log_override is not None:
+            env_vars['RUST_LOG'] = self.rust_log_override
+
+        # Pass coverage settings
+        var = 'LLVM_PROFILE_FILE'
+        val = os.environ.get(var)
+        if val:
+            env_vars[var] = val
+
+        # Intercept CalledProcessError and print more info
+        try:
+            res = subprocess.run(args,
+                                 env=env_vars,
+                                 check=True,
+                                 universal_newlines=True,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.PIPE)
+            log.info(f"Run success: {res.stdout}")
+        except subprocess.CalledProcessError as exc:
+            # this way command output will be in recorded and shown in CI in failure message
+            msg = f"""\
+            Run failed: {exc}
+              stdout: {exc.stdout}
+              stderr: {exc.stderr}
+            """
+            log.info(msg)
+
+            raise Exception(msg) from exc
+
+        return res
+
    @cached_property
    def auth_keys(self) -> AuthKeys:
        pub = (Path(self.repo_dir) / 'auth_public_key.pem').read_bytes()
@@ -621,10 +664,10 @@ def _shared_simple_env(request: Any, port_distributor) -> Iterator[ZenithEnv]:

    with ZenithEnvBuilder(Path(repo_dir), port_distributor) as builder:

-        env = builder.init_start()
+        env = builder.init()

        # For convenience in tests, create a branch from the freshly-initialized cluster.
-        env.zenith_cli.create_branch("empty", "main")
+        env.zenith_cli(["branch", "empty", "main"])

        # Return the builder to the caller
        yield env
@@ -655,7 +698,7 @@ def zenith_env_builder(test_output_dir, port_distributor) -> Iterator[ZenithEnvB
    To use, define 'zenith_env_builder' fixture in your test to get access to the
    builder object. Set properties on it to describe the environment.
    Finally, initialize and start up the environment by calling
-    zenith_env_builder.init_start().
+    zenith_env_builder.init().

    After the initialization, you can launch compute nodes by calling
    the functions in the 'env.postgres' factory object, stop/start the
@@ -670,10 +713,6 @@ def zenith_env_builder(test_output_dir, port_distributor) -> Iterator[ZenithEnvB
        yield builder


-class ZenithPageserverApiException(Exception):
-    pass
-
-
 class ZenithPageserverHttpClient(requests.Session):
    def __init__(self, port: int, auth_token: Optional[str] = None) -> None:
        super().__init__()
@@ -683,32 +722,22 @@ class ZenithPageserverHttpClient(requests.Session):
        if auth_token is not None:
            self.headers['Authorization'] = f'Bearer {auth_token}'

-    def verbose_error(self, res: requests.Response):
-        try:
-            res.raise_for_status()
-        except requests.RequestException as e:
-            try:
-                msg = res.json()['msg']
-            except:
-                msg = ''
-            raise ZenithPageserverApiException(msg) from e
-
    def check_status(self):
        self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()

    def timeline_attach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
        res = self.post(
            f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}/attach", )
-        self.verbose_error(res)
+        res.raise_for_status()

    def timeline_detach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
        res = self.post(
            f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}/detach", )
-        self.verbose_error(res)
+        res.raise_for_status()

    def branch_list(self, tenant_id: uuid.UUID) -> List[Dict[Any, Any]]:
        res = self.get(f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}")
-        self.verbose_error(res)
+        res.raise_for_status()
        res_json = res.json()
        assert isinstance(res_json, list)
        return res_json
@@ -720,7 +749,7 @@ class ZenithPageserverHttpClient(requests.Session):
                            'name': name,
                            'start_point': start_point,
                        })
-        self.verbose_error(res)
+        res.raise_for_status()
        res_json = res.json()
        assert isinstance(res_json, dict)
        return res_json
@@ -729,14 +758,14 @@ class ZenithPageserverHttpClient(requests.Session):
        res = self.get(
            f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}/{name}?include-non-incremental-logical-size=1",
        )
-        self.verbose_error(res)
+        res.raise_for_status()
        res_json = res.json()
        assert isinstance(res_json, dict)
        return res_json

    def tenant_list(self) -> List[Dict[Any, Any]]:
        res = self.get(f"http://localhost:{self.port}/v1/tenant")
-        self.verbose_error(res)
+        res.raise_for_status()
        res_json = res.json()
        assert isinstance(res_json, list)
        return res_json
@@ -748,27 +777,27 @@ class ZenithPageserverHttpClient(requests.Session):
                'tenant_id': tenant_id.hex,
            },
        )
-        self.verbose_error(res)
+        res.raise_for_status()
        return res.json()

    def timeline_list(self, tenant_id: uuid.UUID) -> List[str]:
        res = self.get(f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}")
-        self.verbose_error(res)
+        res.raise_for_status()
        res_json = res.json()
        assert isinstance(res_json, list)
        return res_json

-    def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
+    def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
        res = self.get(
            f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}")
-        self.verbose_error(res)
+        res.raise_for_status()
        res_json = res.json()
        assert isinstance(res_json, dict)
        return res_json

    def get_metrics(self) -> str:
        res = self.get(f"http://localhost:{self.port}/metrics")
-        self.verbose_error(res)
+        res.raise_for_status()
        return res.text


@@ -795,190 +824,6 @@ class S3Storage:
 RemoteStorage = Union[LocalFsStorage, S3Storage]


-class ZenithCli:
-    """
-    A typed wrapper around the `zenith` CLI tool.
-    Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
-    """
-    def __init__(self, env: ZenithEnv) -> None:
-        self.env = env
-        pass
-
-    def create_tenant(self, tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
-        if tenant_id is None:
-            tenant_id = uuid.uuid4()
-        self.raw_cli(['tenant', 'create', tenant_id.hex])
-        return tenant_id
-
-    def list_tenants(self) -> 'subprocess.CompletedProcess[str]':
-        return self.raw_cli(['tenant', 'list'])
-
-    def create_branch(self,
-                      branch_name: str,
-                      starting_point: str,
-                      tenant_id: Optional[uuid.UUID] = None) -> 'subprocess.CompletedProcess[str]':
-        args = ['branch']
-        if tenant_id is not None:
-            args.extend(['--tenantid', tenant_id.hex])
-        args.extend([branch_name, starting_point])
-
-        return self.raw_cli(args)
-
-    def list_branches(self,
-                      tenant_id: Optional[uuid.UUID] = None) -> 'subprocess.CompletedProcess[str]':
-        args = ['branch']
-        if tenant_id is not None:
-            args.extend(['--tenantid', tenant_id.hex])
-        return self.raw_cli(args)
-
-    def init(self, config_toml: str) -> 'subprocess.CompletedProcess[str]':
-        with tempfile.NamedTemporaryFile(mode='w+') as tmp:
-            tmp.write(config_toml)
-            tmp.flush()
-
-            cmd = ['init', f'--config={tmp.name}']
-            append_pageserver_param_overrides(cmd, self.env.pageserver.remote_storage)
-
-            return self.raw_cli(cmd)
-
-    def pageserver_start(self, overrides=()) -> 'subprocess.CompletedProcess[str]':
-        start_args = ['pageserver', 'start', *overrides]
-
-        append_pageserver_param_overrides(start_args, self.env.pageserver.remote_storage)
-        return self.raw_cli(start_args)
-
-    def pageserver_stop(self, immediate=False) -> 'subprocess.CompletedProcess[str]':
-        cmd = ['pageserver', 'stop']
-        if immediate:
-            cmd.extend(['-m', 'immediate'])
-
-        log.info(f"Stopping pageserver with {cmd}")
-        return self.raw_cli(cmd)
-
-    def safekeeper_start(self, id: int) -> 'subprocess.CompletedProcess[str]':
-        return self.raw_cli(['safekeeper', 'start', str(id)])
-
-    def safekeeper_stop(self,
-                        id: Optional[int] = None,
-                        immediate=False) -> 'subprocess.CompletedProcess[str]':
-        args = ['safekeeper', 'stop']
-        if id is not None:
-            args.extend(str(id))
-        if immediate:
-            args.extend(['-m', 'immediate'])
-        return self.raw_cli(args)
-
-    def pg_create(
-        self,
-        node_name: str,
-        tenant_id: Optional[uuid.UUID] = None,
-        timeline_spec: Optional[str] = None,
-        port: Optional[int] = None,
-    ) -> 'subprocess.CompletedProcess[str]':
-        args = ['pg', 'create']
-        if tenant_id is not None:
-            args.extend(['--tenantid', tenant_id.hex])
-        if port is not None:
-            args.append(f'--port={port}')
-        args.append(node_name)
-        if timeline_spec is not None:
-            args.append(timeline_spec)
-        return self.raw_cli(args)
-
-    def pg_start(
-        self,
-        node_name: str,
-        tenant_id: Optional[uuid.UUID] = None,
-        timeline_spec: Optional[str] = None,
-        port: Optional[int] = None,
-    ) -> 'subprocess.CompletedProcess[str]':
-        args = ['pg', 'start']
-        if tenant_id is not None:
-            args.extend(['--tenantid', tenant_id.hex])
-        if port is not None:
-            args.append(f'--port={port}')
-        args.append(node_name)
-        if timeline_spec is not None:
-            args.append(timeline_spec)
-
-        return self.raw_cli(args)
-
-    def pg_stop(
-        self,
-        node_name: str,
-        tenant_id: Optional[uuid.UUID] = None,
-        destroy=False,
-    ) -> 'subprocess.CompletedProcess[str]':
-        args = ['pg', 'stop']
-        if tenant_id is not None:
-            args.extend(['--tenantid', tenant_id.hex])
-        if destroy:
-            args.append('--destroy')
-        args.append(node_name)
-
-        return self.raw_cli(args)
-
-    def raw_cli(self,
-                arguments: List[str],
-                check_return_code=True) -> 'subprocess.CompletedProcess[str]':
-        """
-        Run "zenith" with the specified arguments.
-
-        Arguments must be in list form, e.g. ['pg', 'create']
-
-        Return both stdout and stderr, which can be accessed as
-
-        >>> result = env.zenith_cli.raw_cli(...)
-        >>> assert result.stderr == ""
-        >>> log.info(result.stdout)
-        """
-
-        assert type(arguments) == list
-
-        bin_zenith = os.path.join(str(zenith_binpath), 'zenith')
-
-        args = [bin_zenith] + arguments
-        log.info('Running command "{}"'.format(' '.join(args)))
-        log.info(f'Running in "{self.env.repo_dir}"')
-
-        env_vars = os.environ.copy()
-        env_vars['ZENITH_REPO_DIR'] = str(self.env.repo_dir)
-        env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
-
-        if self.env.rust_log_override is not None:
-            env_vars['RUST_LOG'] = self.env.rust_log_override
-
-        # Pass coverage settings
-        var = 'LLVM_PROFILE_FILE'
-        val = os.environ.get(var)
-        if val:
-            env_vars[var] = val
-
-        # Intercept CalledProcessError and print more info
-        try:
-            res = subprocess.run(args,
-                                 env=env_vars,
-                                 check=True,
-                                 universal_newlines=True,
-                                 stdout=subprocess.PIPE,
-                                 stderr=subprocess.PIPE)
-            log.info(f"Run success: {res.stdout}")
-        except subprocess.CalledProcessError as exc:
-            # this way command output will be in recorded and shown in CI in failure message
-            msg = f"""\
-            Run failed: {exc}
-              stdout: {exc.stdout}
-              stderr: {exc.stderr}
-            """
-            log.info(msg)
-
-            raise Exception(msg) from exc
-
-        if check_return_code:
-            res.check_returncode()
-        return res
-
-
 class ZenithPageserver(PgProtocol):
    """
    An object representing a running pageserver.
@@ -990,20 +835,23 @@ class ZenithPageserver(PgProtocol):
                 port: PageserverPort,
                 remote_storage: Optional[RemoteStorage] = None,
                 enable_auth=False):
-        super().__init__(host='localhost', port=port.pg, username='zenith_admin')
+        super().__init__(host='localhost', port=port.pg)
        self.env = env
        self.running = False
        self.service_port = port  # do not shadow PgProtocol.port which is just int
        self.remote_storage = remote_storage

-    def start(self, overrides=()) -> 'ZenithPageserver':
+    def start(self) -> 'ZenithPageserver':
        """
        Start the page server.
        Returns self.
        """
        assert self.running == False

-        self.env.zenith_cli.pageserver_start(overrides=overrides)
+        start_args = ['pageserver', 'start']
+        append_pageserver_param_overrides(start_args, self.remote_storage)
+
+        self.env.zenith_cli(start_args)
        self.running = True
        return self

@@ -1012,8 +860,13 @@ class ZenithPageserver(PgProtocol):
        Stop the page server.
        Returns self.
        """
+        cmd = ['pageserver', 'stop']
+        if immediate:
+            cmd.extend(['-m', 'immediate'])
+
+        log.info(f"Stopping pageserver with {cmd}")
        if self.running:
-            self.env.zenith_cli.pageserver_stop(immediate)
+            self.env.zenith_cli(cmd)
            self.running = False

        return self
@@ -1120,54 +973,10 @@ def pg_bin(test_output_dir: str) -> PgBin:
    return PgBin(test_output_dir)


-class VanillaPostgres(PgProtocol):
-    def __init__(self, pgdatadir: str, pg_bin: PgBin, port: int):
-        super().__init__(host='localhost', port=port)
-        self.pgdatadir = pgdatadir
-        self.pg_bin = pg_bin
-        self.running = False
-        self.pg_bin.run_capture(['initdb', '-D', pgdatadir])
-
-    def configure(self, options: List[str]) -> None:
-        """Append lines into postgresql.conf file."""
-        assert not self.running
-        with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file:
-            conf_file.writelines(options)
-
-    def start(self) -> None:
-        assert not self.running
-        self.running = True
-        self.pg_bin.run_capture(['pg_ctl', '-D', self.pgdatadir, 'start'])
-
-    def stop(self) -> None:
-        assert self.running
-        self.running = False
-        self.pg_bin.run_capture(['pg_ctl', '-D', self.pgdatadir, 'stop'])
-
-    def get_subdir_size(self, subdir) -> int:
-        """Return size of pgdatadir subdirectory in bytes."""
-        return get_dir_size(os.path.join(self.pgdatadir, subdir))
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc, tb):
-        if self.running:
-            self.stop()
-
-
-@pytest.fixture(scope='function')
-def vanilla_pg(test_output_dir: str) -> Iterator[VanillaPostgres]:
-    pgdatadir = os.path.join(test_output_dir, "pgdata-vanilla")
-    pg_bin = PgBin(test_output_dir)
-    with VanillaPostgres(pgdatadir, pg_bin, 5432) as vanilla_pg:
-        yield vanilla_pg
-
-
 class Postgres(PgProtocol):
    """ An object representing a running postgres daemon. """
-    def __init__(self, env: ZenithEnv, tenant_id: uuid.UUID, port: int):
-        super().__init__(host='localhost', port=port, username='zenith_admin')
+    def __init__(self, env: ZenithEnv, tenant_id: str, port: int):
+        super().__init__(host='localhost', port=port)

        self.env = env
        self.running = False
@@ -1193,12 +1002,16 @@ class Postgres(PgProtocol):
        if branch is None:
            branch = node_name

-        self.env.zenith_cli.pg_create(node_name,
-                                      tenant_id=self.tenant_id,
-                                      port=self.port,
-                                      timeline_spec=branch)
+        self.env.zenith_cli([
+            'pg',
+            'create',
+            f'--tenantid={self.tenant_id}',
+            f'--port={self.port}',
+            node_name,
+            branch
+        ])
        self.node_name = node_name
-        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name
+        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name
        self.pgdata_dir = os.path.join(self.env.repo_dir, path)

        if config_lines is None:
@@ -1217,9 +1030,8 @@ class Postgres(PgProtocol):

        log.info(f"Starting postgres node {self.node_name}")

-        run_result = self.env.zenith_cli.pg_start(self.node_name,
-                                                  tenant_id=self.tenant_id,
-                                                  port=self.port)
+        run_result = self.env.zenith_cli(
+            ['pg', 'start', f'--tenantid={self.tenant_id}', f'--port={self.port}', self.node_name])
        self.running = True

        log.info(f"stdout: {run_result.stdout}")
@@ -1229,7 +1041,7 @@ class Postgres(PgProtocol):
    def pg_data_dir_path(self) -> str:
        """ Path to data directory """
        assert self.node_name
-        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name
+        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name
        return os.path.join(self.env.repo_dir, path)

    def pg_xact_dir_path(self) -> str:
@@ -1289,7 +1101,7 @@ class Postgres(PgProtocol):

        if self.running:
            assert self.node_name is not None
-            self.env.zenith_cli.pg_stop(self.node_name, tenant_id=self.tenant_id)
+            self.env.zenith_cli(['pg', 'stop', self.node_name, f'--tenantid={self.tenant_id}'])
            self.running = False

        return self
@@ -1301,7 +1113,8 @@ class Postgres(PgProtocol):
        """

        assert self.node_name is not None
-        self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id, destroy=True)
+        self.env.zenith_cli(
+            ['pg', 'stop', '--destroy', self.node_name, f'--tenantid={self.tenant_id}'])
        self.node_name = None

        return self
@@ -1343,7 +1156,7 @@ class PostgresFactory:
    def create_start(self,
                     node_name: str = "main",
                     branch: Optional[str] = None,
-                     tenant_id: Optional[uuid.UUID] = None,
+                     tenant_id: Optional[str] = None,
                     config_lines: Optional[List[str]] = None) -> Postgres:

        pg = Postgres(
@@ -1363,7 +1176,7 @@ class PostgresFactory:
    def create(self,
               node_name: str = "main",
               branch: Optional[str] = None,
-               tenant_id: Optional[uuid.UUID] = None,
+               tenant_id: Optional[str] = None,
               config_lines: Optional[List[str]] = None) -> Postgres:

        pg = Postgres(
@@ -1404,14 +1217,12 @@ class Safekeeper:
    """ An object representing a running safekeeper daemon. """
    env: ZenithEnv
    port: SafekeeperPort
-    id: int
+    name: str  # identifier for logging
    auth_token: Optional[str] = None
-    running: bool = False

    def start(self) -> 'Safekeeper':
-        assert self.running == False
-        self.env.zenith_cli.safekeeper_start(self.id)
-        self.running = True
+        self.env.zenith_cli(['safekeeper', 'start', self.name])
+
        # wait for wal acceptor start by checking its status
        started_at = time.time()
        while True:
@@ -1429,14 +1240,16 @@ class Safekeeper:
        return self

    def stop(self, immediate=False) -> 'Safekeeper':
-        log.info('Stopping safekeeper {}'.format(self.id))
-        self.env.zenith_cli.safekeeper_stop(self.id, immediate)
-        self.running = False
+        cmd = ['safekeeper', 'stop']
+        if immediate:
+            cmd.extend(['-m', 'immediate'])
+        cmd.append(self.name)
+
+        log.info('Stopping safekeeper {}'.format(self.name))
+        self.env.zenith_cli(cmd)
        return self

-    def append_logical_message(self,
-                               tenant_id: uuid.UUID,
-                               timeline_id: uuid.UUID,
+    def append_logical_message(self, tenant_id: str, timeline_id: str,
                               request: Dict[str, Any]) -> Dict[str, Any]:
        """
        Send JSON_CTRL query to append LogicalMessage to WAL and modify
@@ -1446,7 +1259,7 @@ class Safekeeper:

        # "replication=0" hacks psycopg not to send additional queries
        # on startup, see https://github.com/psycopg/psycopg2/pull/482
-        connstr = f"host=localhost port={self.port.pg} replication=0 options='-c ztimelineid={timeline_id.hex} ztenantid={tenant_id.hex}'"
+        connstr = f"host=localhost port={self.port.pg} replication=0 options='-c ztimelineid={timeline_id} ztenantid={tenant_id}'"

        with closing(psycopg2.connect(connstr)) as conn:
            # server doesn't support transactions
@@ -1475,8 +1288,8 @@ class SafekeeperTimelineStatus:
 class SafekeeperMetrics:
    # These are metrics from Prometheus which uses float64 internally.
    # As a consequence, values may differ from real original int64s.
-    flush_lsn_inexact: Dict[Tuple[str, str], int] = field(default_factory=dict)
-    commit_lsn_inexact: Dict[Tuple[str, str], int] = field(default_factory=dict)
+    flush_lsn_inexact: Dict[str, int] = field(default_factory=dict)
+    commit_lsn_inexact: Dict[str, int] = field(default_factory=dict)


 class SafekeeperHttpClient(requests.Session):
@@ -1500,16 +1313,14 @@ class SafekeeperHttpClient(requests.Session):
        all_metrics_text = request_result.text

        metrics = SafekeeperMetrics()
-        for match in re.finditer(
-                r'^safekeeper_flush_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$',
-                all_metrics_text,
-                re.MULTILINE):
-            metrics.flush_lsn_inexact[(match.group(1), match.group(2))] = int(match.group(3))
-        for match in re.finditer(
-                r'^safekeeper_commit_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$',
-                all_metrics_text,
-                re.MULTILINE):
-            metrics.commit_lsn_inexact[(match.group(1), match.group(2))] = int(match.group(3))
+        for match in re.finditer(r'^safekeeper_flush_lsn{ztli="([0-9a-f]+)"} (\S+)$',
+                                 all_metrics_text,
+                                 re.MULTILINE):
+            metrics.flush_lsn_inexact[match.group(1)] = int(match.group(2))
+        for match in re.finditer(r'^safekeeper_commit_lsn{ztli="([0-9a-f]+)"} (\S+)$',
+                                 all_metrics_text,
+                                 re.MULTILINE):
+            metrics.commit_lsn_inexact[match.group(1)] = int(match.group(2))
        return metrics


@@ -1618,7 +1429,7 @@ def check_restored_datadir_content(test_output_dir: str, env: ZenithEnv, pg: Pos
        {psql_path}                                    \
            --no-psqlrc                                \
            postgres://localhost:{env.pageserver.service_port.pg}  \
-            -c 'basebackup {pg.tenant_id.hex} {timeline}'  \
+            -c 'basebackup {pg.tenant_id} {timeline}'  \
         | tar -x -C {restored_dir_path}
    """

--- a/test_runner/performance/test_bulk_insert.py
+++ b/test_runner/performance/test_bulk_insert.py
@@ -2,7 +2,8 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log
 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
-from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
+
+pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")


 #
@@ -15,19 +16,47 @@ from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
 # 3. Disk space used
 # 4. Peak memory usage
 #
-def test_bulk_insert(zenith_with_baseline: PgCompare):
-    env = zenith_with_baseline
+def test_bulk_insert(zenith_simple_env: ZenithEnv, zenbenchmark: ZenithBenchmarker):
+    env = zenith_simple_env
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_bulk_insert", "empty"])
+
+    pg = env.postgres.create_start('test_bulk_insert')
+    log.info("postgres is running on 'test_bulk_insert' branch")
+
+    # Open a connection directly to the page server that we'll use to force
+    # flushing the layers to disk
+    psconn = env.pageserver.connect()
+    pscur = psconn.cursor()

    # Get the timeline ID of our branch. We need it for the 'do_gc' command
-    with closing(env.pg.connect()) as conn:
+    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
+            cur.execute("SHOW zenith.zenith_timeline")
+            timeline = cur.fetchone()[0]
+
            cur.execute("create table huge (i int, j int);")

            # Run INSERT, recording the time and I/O it takes
-            with env.record_pageserver_writes('pageserver_writes'):
-                with env.record_duration('insert'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
+                with zenbenchmark.record_duration('insert'):
                    cur.execute("insert into huge values (generate_series(1, 5000000), 0);")
-                    env.flush()

-            env.report_peak_memory_use()
-            env.report_size()
+                    # Flush the layers from memory to disk. This is included in the reported
+                    # time and I/O
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
+
+            # Record peak memory usage
+            zenbenchmark.record("peak_mem",
+                                zenbenchmark.get_peak_mem(env.pageserver) / 1024,
+                                'MB',
+                                report=MetricReport.LOWER_IS_BETTER)
+
+            # Report disk space used by the repository
+            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
+                                                           env.initial_tenant,
+                                                           timeline)
+            zenbenchmark.record('size',
+                                timeline_size / (1024 * 1024),
+                                'MB',
+                                report=MetricReport.LOWER_IS_BETTER)
--- a/test_runner/performance/test_bulk_tenant_create.py
+++ b/test_runner/performance/test_bulk_tenant_create.py
@@ -4,6 +4,8 @@ import pytest

 from fixtures.zenith_fixtures import ZenithEnvBuilder

+pytest_plugins = ("fixtures.benchmark_fixture")
+
 # Run bulk tenant creation test.
 #
 # Collects metrics:
@@ -23,7 +25,7 @@ def test_bulk_tenant_create(
    """Measure tenant creation time (with and without wal acceptors)"""
    if use_wal_acceptors == 'with_wa':
        zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    env = zenith_env_builder.init()

    time_slices = []

@@ -31,10 +33,12 @@ def test_bulk_tenant_create(
        start = timeit.default_timer()

        tenant = env.create_tenant()
-        env.zenith_cli.create_branch(
+        env.zenith_cli([
+            "branch",
            f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
            "main",
-            tenant_id=tenant)
+            f"--tenantid={tenant}"
+        ])

        # FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
        #if use_wal_acceptors == 'with_wa':
--- a/test_runner/performance/test_copy.py
+++ b/test_runner/performance/test_copy.py
@@ -2,10 +2,11 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log
 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
-from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
 from io import BufferedReader, RawIOBase
 from itertools import repeat

+pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+

 class CopyTestData(RawIOBase):
    def __init__(self, rows: int):
@@ -41,41 +42,77 @@ def copy_test_data(rows: int):
 #
 # COPY performance tests.
 #
-def test_copy(zenith_with_baseline: PgCompare):
-    env = zenith_with_baseline
+def test_copy(zenith_simple_env: ZenithEnv, zenbenchmark: ZenithBenchmarker):
+    env = zenith_simple_env
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_copy", "empty"])
+
+    pg = env.postgres.create_start('test_copy')
+    log.info("postgres is running on 'test_copy' branch")
+
+    # Open a connection directly to the page server that we'll use to force
+    # flushing the layers to disk
+    psconn = env.pageserver.connect()
+    pscur = psconn.cursor()

    # Get the timeline ID of our branch. We need it for the pageserver 'checkpoint' command
-    with closing(env.pg.connect()) as conn:
+    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
+            cur.execute("SHOW zenith.zenith_timeline")
+            timeline = cur.fetchone()[0]
+
            cur.execute("create table copytest (i int, t text);")

            # Load data with COPY, recording the time and I/O it takes.
            #
            # Since there's no data in the table previously, this extends it.
-            with env.record_pageserver_writes('copy_extend_pageserver_writes'):
-                with env.record_duration('copy_extend'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver,
+                                                       'copy_extend_pageserver_writes'):
+                with zenbenchmark.record_duration('copy_extend'):
                    cur.copy_from(copy_test_data(1000000), 'copytest')
-                    env.flush()
+                    # Flush the layers from memory to disk. This is included in the reported
+                    # time and I/O
+                    pscur.execute(f"checkpoint {env.initial_tenant} {timeline}")

            # Delete most rows, and VACUUM to make the space available for reuse.
-            with env.record_pageserver_writes('delete_pageserver_writes'):
-                with env.record_duration('delete'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver, 'delete_pageserver_writes'):
+                with zenbenchmark.record_duration('delete'):
                    cur.execute("delete from copytest where i % 100 <> 0;")
-                    env.flush()
+                    # Flush the layers from memory to disk. This is included in the reported
+                    # time and I/O
+                    pscur.execute(f"checkpoint {env.initial_tenant} {timeline}")

-            with env.record_pageserver_writes('vacuum_pageserver_writes'):
-                with env.record_duration('vacuum'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver, 'vacuum_pageserver_writes'):
+                with zenbenchmark.record_duration('vacuum'):
                    cur.execute("vacuum copytest")
-                    env.flush()
+                    # Flush the layers from memory to disk. This is included in the reported
+                    # time and I/O
+                    pscur.execute(f"checkpoint {env.initial_tenant} {timeline}")

            # Load data into the table again. This time, this will use the space free'd
            # by the VACUUM.
            #
            # This will also clear all the VM bits.
-            with env.record_pageserver_writes('copy_reuse_pageserver_writes'):
-                with env.record_duration('copy_reuse'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver,
+                                                       'copy_reuse_pageserver_writes'):
+                with zenbenchmark.record_duration('copy_reuse'):
                    cur.copy_from(copy_test_data(1000000), 'copytest')
-                    env.flush()

-            env.report_peak_memory_use()
-            env.report_size()
+                    # Flush the layers from memory to disk. This is included in the reported
+                    # time and I/O
+                    pscur.execute(f"checkpoint {env.initial_tenant} {timeline}")
+
+            # Record peak memory usage
+            zenbenchmark.record("peak_mem",
+                                zenbenchmark.get_peak_mem(env.pageserver) / 1024,
+                                'MB',
+                                report=MetricReport.LOWER_IS_BETTER)
+
+            # Report disk space used by the repository
+            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
+                                                           env.initial_tenant,
+                                                           timeline)
+            zenbenchmark.record('size',
+                                timeline_size / (1024 * 1024),
+                                'MB',
+                                report=MetricReport.LOWER_IS_BETTER)
--- a/test_runner/performance/test_gist_build.py
+++ b/test_runner/performance/test_gist_build.py
@@ -2,20 +2,34 @@ import os
 from contextlib import closing
 from fixtures.benchmark_fixture import MetricReport
 from fixtures.zenith_fixtures import ZenithEnv
-from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+

 #
 # Test buffering GisT build. It WAL-logs the whole relation, in 32-page chunks.
 # As of this writing, we're duplicate those giant WAL records for each page,
 # which makes the delta layer about 32x larger than it needs to be.
 #
-def test_gist_buffering_build(zenith_with_baseline: PgCompare):
-    env = zenith_with_baseline
+def test_gist_buffering_build(zenith_simple_env: ZenithEnv, zenbenchmark):
+    env = zenith_simple_env
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_gist_buffering_build", "empty"])

-    with closing(env.pg.connect()) as conn:
+    pg = env.postgres.create_start('test_gist_buffering_build')
+    log.info("postgres is running on 'test_gist_buffering_build' branch")
+
+    # Open a connection directly to the page server that we'll use to force
+    # flushing the layers to disk
+    psconn = env.pageserver.connect()
+    pscur = psconn.cursor()
+
+    # Get the timeline ID of our branch. We need it for the 'do_gc' command
+    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
+            cur.execute("SHOW zenith.zenith_timeline")
+            timeline = cur.fetchone()[0]

            # Create test table.
            cur.execute("create table gist_point_tbl(id int4, p point)")
@@ -24,12 +38,27 @@ def test_gist_buffering_build(zenith_with_baseline: PgCompare):
            )

            # Build the index.
-            with env.record_pageserver_writes('pageserver_writes'):
-                with env.record_duration('build'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
+                with zenbenchmark.record_duration('build'):
                    cur.execute(
                        "create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)"
                    )
-                    env.flush()

-            env.report_peak_memory_use()
-            env.report_size()
+                    # Flush the layers from memory to disk. This is included in the reported
+                    # time and I/O
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 1000000")
+
+            # Record peak memory usage
+            zenbenchmark.record("peak_mem",
+                                zenbenchmark.get_peak_mem(env.pageserver) / 1024,
+                                'MB',
+                                report=MetricReport.LOWER_IS_BETTER)
+
+            # Report disk space used by the repository
+            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
+                                                           env.initial_tenant,
+                                                           timeline)
+            zenbenchmark.record('size',
+                                timeline_size / (1024 * 1024),
+                                'MB',
+                                report=MetricReport.LOWER_IS_BETTER)
--- a/test_runner/performance/test_parallel_copy_to.py
+++ b/test_runner/performance/test_parallel_copy_to.py
@@ -1,10 +1,11 @@
 from io import BytesIO
 import asyncio
 import asyncpg
-from fixtures.zenith_fixtures import ZenithEnv, Postgres, PgProtocol
+from fixtures.zenith_fixtures import ZenithEnv, Postgres
 from fixtures.log_helper import log
 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
-from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
+
+pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")


 async def repeat_bytes(buf, repetitions: int):
@@ -12,7 +13,7 @@ async def repeat_bytes(buf, repetitions: int):
        yield buf


-async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: str):
+async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str):
    buf = BytesIO()
    for i in range(1000):
        buf.write(
@@ -25,7 +26,7 @@ async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: st
    await pg_conn.copy_to_table(table_name, source=copy_input)


-async def parallel_load_different_tables(pg: PgProtocol, n_parallel: int):
+async def parallel_load_different_tables(pg: Postgres, n_parallel: int):
    workers = []
    for worker_id in range(n_parallel):
        worker = copy_test_data_to_table(pg, worker_id, f'copytest_{worker_id}')
@@ -36,25 +37,54 @@ async def parallel_load_different_tables(pg: PgProtocol, n_parallel: int):


 # Load 5 different tables in parallel with COPY TO
-def test_parallel_copy_different_tables(zenith_with_baseline: PgCompare, n_parallel=5):
+def test_parallel_copy_different_tables(zenith_simple_env: ZenithEnv,
+                                        zenbenchmark: ZenithBenchmarker,
+                                        n_parallel=5):

-    env = zenith_with_baseline
-    conn = env.pg.connect()
+    env = zenith_simple_env
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_parallel_copy_different_tables", "empty"])
+
+    pg = env.postgres.create_start('test_parallel_copy_different_tables')
+    log.info("postgres is running on 'test_parallel_copy_different_tables' branch")
+
+    # Open a connection directly to the page server that we'll use to force
+    # flushing the layers to disk
+    psconn = env.pageserver.connect()
+    pscur = psconn.cursor()
+
+    # Get the timeline ID of our branch. We need it for the 'do_gc' command
+    conn = pg.connect()
    cur = conn.cursor()
+    cur.execute("SHOW zenith.zenith_timeline")
+    timeline = cur.fetchone()[0]

    for worker_id in range(n_parallel):
        cur.execute(f'CREATE TABLE copytest_{worker_id} (i int, t text)')

-    with env.record_pageserver_writes('pageserver_writes'):
-        with env.record_duration('load'):
-            asyncio.run(parallel_load_different_tables(env.pg, n_parallel))
-            env.flush()
+    with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
+        with zenbenchmark.record_duration('load'):
+            asyncio.run(parallel_load_different_tables(pg, n_parallel))

-    env.report_peak_memory_use()
-    env.report_size()
+            # Flush the layers from memory to disk. This is included in the reported
+            # time and I/O
+            pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
+
+    # Record peak memory usage
+    zenbenchmark.record("peak_mem",
+                        zenbenchmark.get_peak_mem(env.pageserver) / 1024,
+                        'MB',
+                        report=MetricReport.LOWER_IS_BETTER)
+
+    # Report disk space used by the repository
+    timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline)
+    zenbenchmark.record('size',
+                        timeline_size / (1024 * 1024),
+                        'MB',
+                        report=MetricReport.LOWER_IS_BETTER)


-async def parallel_load_same_table(pg: PgProtocol, n_parallel: int):
+async def parallel_load_same_table(pg: Postgres, n_parallel: int):
    workers = []
    for worker_id in range(n_parallel):
        worker = copy_test_data_to_table(pg, worker_id, f'copytest')
@@ -65,17 +95,46 @@ async def parallel_load_same_table(pg: PgProtocol, n_parallel: int):


 # Load data into one table with COPY TO from 5 parallel connections
-def test_parallel_copy_same_table(zenith_with_baseline: PgCompare, n_parallel=5):
-    env = zenith_with_baseline
-    conn = env.pg.connect()
+def test_parallel_copy_same_table(zenith_simple_env: ZenithEnv,
+                                  zenbenchmark: ZenithBenchmarker,
+                                  n_parallel=5):
+    env = zenith_simple_env
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_parallel_copy_same_table", "empty"])
+
+    pg = env.postgres.create_start('test_parallel_copy_same_table')
+    log.info("postgres is running on 'test_parallel_copy_same_table' branch")
+
+    # Open a connection directly to the page server that we'll use to force
+    # flushing the layers to disk
+    psconn = env.pageserver.connect()
+    pscur = psconn.cursor()
+
+    # Get the timeline ID of our branch. We need it for the 'do_gc' command
+    conn = pg.connect()
    cur = conn.cursor()
+    cur.execute("SHOW zenith.zenith_timeline")
+    timeline = cur.fetchone()[0]

    cur.execute(f'CREATE TABLE copytest (i int, t text)')

-    with env.record_pageserver_writes('pageserver_writes'):
-        with env.record_duration('load'):
-            asyncio.run(parallel_load_same_table(env.pg, n_parallel))
-            env.flush()
+    with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
+        with zenbenchmark.record_duration('load'):
+            asyncio.run(parallel_load_same_table(pg, n_parallel))

-    env.report_peak_memory_use()
-    env.report_size()
+            # Flush the layers from memory to disk. This is included in the reported
+            # time and I/O
+            pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
+
+    # Record peak memory usage
+    zenbenchmark.record("peak_mem",
+                        zenbenchmark.get_peak_mem(env.pageserver) / 1024,
+                        'MB',
+                        report=MetricReport.LOWER_IS_BETTER)
+
+    # Report disk space used by the repository
+    timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline)
+    zenbenchmark.record('size',
+                        timeline_size / (1024 * 1024),
+                        'MB',
+                        report=MetricReport.LOWER_IS_BETTER)
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -1,10 +1,11 @@
 from contextlib import closing
-from fixtures.zenith_fixtures import PgBin, VanillaPostgres, ZenithEnv
-from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
+from fixtures.zenith_fixtures import PgBin, ZenithEnv

 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
 from fixtures.log_helper import log

+pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+

 #
 # Run a very short pgbench test.
@@ -15,16 +16,47 @@ from fixtures.log_helper import log
 # 2. Time to run 5000 pgbench transactions
 # 3. Disk space used
 #
-def test_pgbench(zenith_with_baseline: PgCompare):
-    env = zenith_with_baseline
+def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin: PgBin, zenbenchmark: ZenithBenchmarker):
+    env = zenith_simple_env
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_pgbench_perf", "empty"])

-    with env.record_pageserver_writes('pageserver_writes'):
-        with env.record_duration('init'):
-            env.pg_bin.run_capture(['pgbench', '-s5', '-i', env.pg.connstr()])
-            env.flush()
+    pg = env.postgres.create_start('test_pgbench_perf')
+    log.info("postgres is running on 'test_pgbench_perf' branch")

-    with env.record_duration('5000_xacts'):
-        env.pg_bin.run_capture(['pgbench', '-c1', '-t5000', env.pg.connstr()])
-    env.flush()
+    # Open a connection directly to the page server that we'll use to force
+    # flushing the layers to disk
+    psconn = env.pageserver.connect()
+    pscur = psconn.cursor()

-    env.report_size()
+    # Get the timeline ID of our branch. We need it for the 'do_gc' command
+    with closing(pg.connect()) as conn:
+        with conn.cursor() as cur:
+            cur.execute("SHOW zenith.zenith_timeline")
+            timeline = cur.fetchone()[0]
+
+    connstr = pg.connstr()
+
+    # Initialize pgbench database, recording the time and I/O it takes
+    with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
+        with zenbenchmark.record_duration('init'):
+            pg_bin.run_capture(['pgbench', '-s5', '-i', connstr])
+
+            # Flush the layers from memory to disk. This is included in the reported
+            # time and I/O
+            pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
+
+    # Run pgbench for 5000 transactions
+    with zenbenchmark.record_duration('5000_xacts'):
+        pg_bin.run_capture(['pgbench', '-c1', '-t5000', connstr])
+
+    # Flush the layers to disk again. This is *not' included in the reported time,
+    # though.
+    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
+
+    # Report disk space used by the repository
+    timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline)
+    zenbenchmark.record('size',
+                        timeline_size / (1024 * 1024),
+                        'MB',
+                        report=MetricReport.LOWER_IS_BETTER)
--- a/test_runner/performance/test_perf_pgbench_remote.py
+++ b/test_runner/performance/test_perf_pgbench_remote.py
@@ -9,6 +9,8 @@ import calendar
 import timeit
 import os

+pytest_plugins = ("fixtures.benchmark_fixture", )
+

 def utc_now_timestamp() -> int:
    return calendar.timegm(datetime.utcnow().utctimetuple())
--- a/test_runner/performance/test_random_writes.py
+++ b/test_runner/performance/test_random_writes.py
@@ -1,79 +0,0 @@
-import os
-from contextlib import closing
-from fixtures.benchmark_fixture import MetricReport
-from fixtures.zenith_fixtures import ZenithEnv
-from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
-from fixtures.log_helper import log
-
-import psycopg2.extras
-import random
-import time
-from fixtures.utils import print_gc_result
-
-
-# This is a clear-box test that demonstrates the worst case scenario for the
-# "1 segment per layer" implementation of the pageserver. It writes to random
-# rows, while almost never writing to the same segment twice before flushing.
-# A naive pageserver implementation would create a full image layer for each
-# dirty segment, leading to write_amplification = segment_size / page_size,
-# when compared to vanilla postgres. With segment_size = 10MB, that's 1250.
-def test_random_writes(zenith_with_baseline: PgCompare):
-    env = zenith_with_baseline
-
-    # Number of rows in the test database. 1M rows runs quickly, but implies
-    # a small effective_checkpoint_distance, which makes the test less realistic.
-    # Using a 300 TB database would imply a 250 MB effective_checkpoint_distance,
-    # but it will take a very long time to run. From what I've seen so far,
-    # increasing n_rows doesn't have impact on the (zenith_runtime / vanilla_runtime)
-    # performance ratio.
-    n_rows = 1 * 1000 * 1000  # around 36 MB table
-
-    # Number of writes per 3 segments. A value of 1 should produce a random
-    # workload where we almost never write to the same segment twice. Larger
-    # values of load_factor produce a larger effective_checkpoint_distance,
-    # making the test more realistic, but less effective. If you want a realistic
-    # worst case scenario and you have time to wait you should increase n_rows instead.
-    load_factor = 1
-
-    # Not sure why but this matters in a weird way (up to 2x difference in perf).
-    # TODO look into it
-    n_iterations = 1
-
-    with closing(env.pg.connect()) as conn:
-        with conn.cursor() as cur:
-            # Create the test table
-            with env.record_duration('init'):
-                cur.execute("""
-                    CREATE TABLE Big(
-                        pk integer primary key,
-                        count integer default 0
-                    );
-                """)
-                cur.execute(f"INSERT INTO Big (pk) values (generate_series(1,{n_rows}))")
-
-            # Get table size (can't be predicted because padding and alignment)
-            cur.execute("SELECT pg_relation_size('Big');")
-            row = cur.fetchone()
-            table_size = row[0]
-            env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM)
-
-            # Decide how much to write, based on knowledge of pageserver implementation.
-            # Avoiding segment collisions maximizes (zenith_runtime / vanilla_runtime).
-            segment_size = 10 * 1024 * 1024
-            n_segments = table_size // segment_size
-            n_writes = load_factor * n_segments // 3
-
-            # The closer this is to 250 MB, the more realistic the test is.
-            effective_checkpoint_distance = table_size * n_writes // n_rows
-            env.zenbenchmark.record("effective_checkpoint_distance",
-                                    effective_checkpoint_distance,
-                                    'bytes',
-                                    MetricReport.TEST_PARAM)
-
-            # Update random keys
-            with env.record_duration('run'):
-                for it in range(n_iterations):
-                    for i in range(n_writes):
-                        key = random.randint(1, n_rows)
-                        cur.execute(f"update Big set count=count+1 where pk={key}")
-                    env.flush()
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bojan Serafimov	64fcf4f096	Implement mock console	2022-02-09 14:30:01 -05:00
Dmitry Ivanov	18d3d078ad	[WIP] [proxy] Migrate to async	2022-02-08 05:43:32 +03:00