WIP

Use random ports for the proxy and local pg in tests
Fixes #1931 Author: Dmitry Ivanov
2026-01-20 20:02:56 +00:00 · 2022-06-15 18:19:21 -04:00 · 2022-06-15 20:21:58 +03:00 · 2022-06-11 00:44:05 +03:00 · 2022-06-09 18:19:18 +02:00 · 2022-06-09 19:16:43 +04:00
128 changed files with 2098 additions and 2244 deletions
--- a/.circleci/ansible/deploy.yaml
+++ b/.circleci/ansible/deploy.yaml
@@ -57,7 +57,7 @@
      args:
        creates: "/storage/pageserver/data/tenants"
      environment:
-        ZENITH_REPO_DIR: "/storage/pageserver/data"
+        NEON_REPO_DIR: "/storage/pageserver/data"
        LD_LIBRARY_PATH: "/usr/local/lib"
      become: true
      tags:
@@ -131,7 +131,7 @@
      args:
        creates: "/storage/safekeeper/data/safekeeper.id"
      environment:
-        ZENITH_REPO_DIR: "/storage/safekeeper/data"
+        NEON_REPO_DIR: "/storage/safekeeper/data"
        LD_LIBRARY_PATH: "/usr/local/lib"
      become: true
      tags:
--- a/.circleci/ansible/systemd/pageserver.service
+++ b/.circleci/ansible/systemd/pageserver.service
@@ -5,7 +5,7 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=pageserver
-Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
 ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
@@ -5,7 +5,7 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=safekeeper
-Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
 ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="wal"}'
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -750,7 +750,6 @@ workflows:
            - build-postgres-<< matrix.build_type >>
      - run-pytest:
          name: pg_regress-tests-<< matrix.build_type >>
-          context: PERF_TEST_RESULT_CONNSTR
          matrix:
            parameters:
              build_type: ["debug", "release"]
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,8 +9,8 @@ tmp_install
 tmp_check_cli
 test_output
 .vscode
-.zenith
-integration_tests/.zenith
+.neon
+integration_tests/.neon
 .mypy_cache

 Dockerfile
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -12,7 +12,7 @@ jobs:
      matrix:
        # If we want to duplicate this job for different
        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
-        rust_toolchain: [stable]
+        rust_toolchain: [1.58]
        os: [ubuntu-latest, macos-latest]
    timeout-minutes: 30
    name: run regression test suite
@@ -87,7 +87,7 @@ jobs:
            ~/.cargo/registry
            ~/.cargo/git
            target
-          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}
+          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}

      - name: Run cargo clippy
        run: ./run_clippy.sh
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,9 @@
 __pycache__/
 test_output/
 .vscode
-/.zenith
-/integration_tests/.zenith
+.idea
+/.neon
+/integration_tests/.neon

 # Coverage
 *.profraw
--- a/.yapfignore
+++ b/.yapfignore
@@ -6,5 +6,5 @@ target/
 tmp_install/
 __pycache__/
 test_output/
-.zenith/
+.neon/
 .git/
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -811,6 +811,7 @@ name = "etcd_broker"
 version = "0.1.0"
 dependencies = [
 "etcd-client",
+ "once_cell",
 "regex",
 "serde",
 "serde_json",
--- a/README.md
+++ b/README.md
@@ -80,7 +80,7 @@ brew link --force libpq
 ```sh
 git clone --recursive https://github.com/neondatabase/neon.git
 cd neon
-make -j5
+make -j`nproc`
 ```

 #### dependency installation notes
@@ -93,7 +93,7 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 #### running neon database
 1. Start pageserver and postgres on top of it (should be called from repo root):
 ```sh
-# Create repository in .zenith with proper paths to binaries and data
+# Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
 > ./target/debug/neon_local init
 initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
@@ -103,16 +103,16 @@ pageserver init succeeded

 # start pageserver and safekeeper
 > ./target/debug/neon_local start
-Starting pageserver at '127.0.0.1:64000' in '.zenith'
+Starting pageserver at '127.0.0.1:64000' in '.neon'
 Pageserver started
 initializing for sk 1 for 7676
-Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
+Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
 Safekeeper started

 # start postgres compute node
 > ./target/debug/neon_local pg start main
 Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
+Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
 Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

 # check list of running postgres instances
@@ -149,7 +149,7 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
 # start postgres on that branch
 > ./target/debug/neon_local pg start migration_check --branch-name migration_check
 Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
+Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
 Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'

 # check the new list of running postgres instances
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -21,9 +21,9 @@ use utils::{
 use crate::safekeeper::SafekeeperNode;

 //
-// This data structures represents zenith CLI config
+// This data structures represents neon_local CLI config
 //
-// It is deserialized from the .zenith/config file, or the config file passed
+// It is deserialized from the .neon/config file, or the config file passed
 // to 'zenith init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
@@ -34,8 +34,8 @@ pub struct LocalEnv {
    // compute nodes).
    //
    // This is not stored in the config file. Rather, this is the path where the
-    // config file itself is. It is read from the ZENITH_REPO_DIR env variable or
-    // '.zenith' if not given.
+    // config file itself is. It is read from the NEON_REPO_DIR env variable or
+    // '.neon' if not given.
    #[serde(skip)]
    pub base_data_dir: PathBuf,

@@ -177,6 +177,7 @@ pub struct SafekeeperConf {
    pub sync: bool,
    pub remote_storage: Option<String>,
    pub backup_threads: Option<u32>,
+    pub auth_enabled: bool,
 }

 impl Default for SafekeeperConf {
@@ -188,6 +189,7 @@ impl Default for SafekeeperConf {
            sync: true,
            remote_storage: None,
            backup_threads: None,
+            auth_enabled: false,
        }
    }
 }
@@ -337,7 +339,7 @@ impl LocalEnv {
    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
        // Currently, the user first passes a config file with 'zenith init --config=<path>'
        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
-        // to .zenith/config. TODO: We lose any formatting and comments along the way, which is
+        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
        // a bit sad.
        let mut conf_content = r#"# This file describes a locale deployment of the page server
 # and safekeeeper node. It is read by the 'zenith' command-line
@@ -481,9 +483,9 @@ impl LocalEnv {
 }

 fn base_path() -> PathBuf {
-    match std::env::var_os("ZENITH_REPO_DIR") {
+    match std::env::var_os("NEON_REPO_DIR") {
        Some(val) => PathBuf::from(val),
-        None => PathBuf::from(".zenith"),
+        None => PathBuf::from(".neon"),
    }
 }

--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -149,6 +149,11 @@ impl SafekeeperNode {
        if let Some(ref remote_storage) = self.conf.remote_storage {
            cmd.args(&["--remote-storage", remote_storage]);
        }
+        if self.conf.auth_enabled {
+            cmd.arg("--auth-validation-public-key-path");
+            // PathBuf is better be passed as is, not via `String`.
+            cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
+        }

        fill_aws_secrets_vars(&mut cmd);

--- a/docs/core_changes.md
+++ b/docs/core_changes.md
@@ -188,7 +188,7 @@ Not currently committed but proposed:
 3. Prefetching
 - Why?
  As far as pages in Zenith are loaded on demand, to reduce node startup time
-  and also sppedup some massive queries we need some mechanism for bulk loading to
+  and also speedup some massive queries we need some mechanism for bulk loading to
  reduce page request round-trip overhead.

  Currently Postgres is supporting prefetching only for bitmap scan.
--- a/docs/rfcs/002-storage.md
+++ b/docs/rfcs/002-storage.md
@@ -77,7 +77,7 @@ Upon storage node restart recent WAL files are applied to appropriate pages and

 ### **Checkpointing**

-No such mechanism is needed. Or we may look at the storage node as at kind of continuous chekpointer.
+No such mechanism is needed. Or we may look at the storage node as at kind of continuous checkpointer.

 ### **Full page writes (torn page protection)**

--- a/docs/rfcs/cluster-size-limits.md
+++ b/docs/rfcs/cluster-size-limits.md
@@ -36,12 +36,12 @@ This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
 Alternatively, we could count only relation data. As in pg_database_size().
 This approach is somewhat more user-friendly because it is the data that is really affected by the user.
 On the other hand, it puts us in a weaker position than other services, i.e., RDS.
-We will need to refactor the timeline_size counter or add another counter to implement it. 
+We will need to refactor the timeline_size counter or add another counter to implement it.

 Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
 Then this size should be reported to compute node.

-`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
+`current_timeline_size` value is included in the walreceiver's custom feedback message: `ReplicationFeedback.`

 (PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).

@@ -64,11 +64,11 @@ We should warn users if the limit is soon to be reached.
 ### **Reliability, failure modes and corner cases**

 1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
-    
+
    If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
-    
+
    So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
-    
+
    Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.


--- a/docs/settings.md
+++ b/docs/settings.md
@@ -154,7 +154,7 @@ The default distrib dir is `./tmp_install/`.
 #### workdir (-D)

 A directory in the file system, where pageserver will store its files.
-The default is `./.zenith/`.
+The default is `./.neon/`.

 This parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.

--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -9,6 +9,7 @@
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"
+ once_cell = "1.8.0"

 utils = { path = "../utils" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/etcd_broker/src/lib.rs
+++ b/libs/etcd_broker/src/lib.rs
@@ -1,90 +1,43 @@
 //! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
 //! Intended to connect services to each other, not to store their data.
+
+/// All broker keys, that are used when dealing with etcd.
+pub mod subscription_key;
+/// All broker values, possible to use when dealing with etcd.
+pub mod subscription_value;
+
 use std::{
    collections::{hash_map, HashMap},
-    fmt::Display,
    str::FromStr,
 };

-use regex::{Captures, Regex};
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
-
-pub use etcd_client::*;
+use serde::de::DeserializeOwned;

+use subscription_key::SubscriptionKey;
 use tokio::{sync::mpsc, task::JoinHandle};
 use tracing::*;
-use utils::{
-    lsn::Lsn,
-    zid::{NodeId, ZTenantId, ZTenantTimelineId},
-};
+use utils::zid::{NodeId, ZTenantTimelineId};
+
+use crate::subscription_key::SubscriptionFullKey;
+
+pub use etcd_client::*;

 /// Default value to use for prefixing to all etcd keys with.
 /// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
 pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";

-#[derive(Debug, Deserialize, Serialize)]
-struct SafekeeperTimeline {
-    safekeeper_id: NodeId,
-    info: SkTimelineInfo,
-}
-
-/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
-#[serde_as]
-#[derive(Debug, Clone, Deserialize, Serialize)]
-pub struct SkTimelineInfo {
-    /// Term of the last entry.
-    pub last_log_term: Option<u64>,
-    /// LSN of the last record.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub flush_lsn: Option<Lsn>,
-    /// Up to which LSN safekeeper regards its WAL as committed.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub commit_lsn: Option<Lsn>,
-    /// LSN up to which safekeeper has backed WAL.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub backup_lsn: Option<Lsn>,
-    /// LSN of last checkpoint uploaded by pageserver.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub remote_consistent_lsn: Option<Lsn>,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub peer_horizon_lsn: Option<Lsn>,
-    #[serde(default)]
-    pub safekeeper_connstr: Option<String>,
-    #[serde(default)]
-    pub pageserver_connstr: Option<String>,
-}
-
-#[derive(Debug, thiserror::Error)]
-pub enum BrokerError {
-    #[error("Etcd client error: {0}. Context: {1}")]
-    EtcdClient(etcd_client::Error, String),
-    #[error("Error during parsing etcd data: {0}")]
-    ParsingError(String),
-    #[error("Internal error: {0}")]
-    InternalError(String),
-}
-
 /// A way to control the data retrieval from a certain subscription.
-pub struct SkTimelineSubscription {
-    safekeeper_timeline_updates:
-        mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>>,
-    kind: SkTimelineSubscriptionKind,
+pub struct BrokerSubscription<V> {
+    value_updates: mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, V>>>,
+    key: SubscriptionKey,
    watcher_handle: JoinHandle<Result<(), BrokerError>>,
    watcher: Watcher,
 }

-impl SkTimelineSubscription {
+impl<V> BrokerSubscription<V> {
    /// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
-    pub async fn fetch_data(
-        &mut self,
-    ) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>> {
-        self.safekeeper_timeline_updates.recv().await
+    pub async fn fetch_data(&mut self) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, V>>> {
+        self.value_updates.recv().await
    }

    /// Cancels the subscription, stopping the data poller and waiting for it to shut down.
@@ -92,142 +45,90 @@ impl SkTimelineSubscription {
        self.watcher.cancel().await.map_err(|e| {
            BrokerError::EtcdClient(
                e,
-                format!(
-                    "Failed to cancel timeline subscription, kind: {:?}",
-                    self.kind
-                ),
+                format!("Failed to cancel broker subscription, kind: {:?}", self.key),
            )
        })?;
        self.watcher_handle.await.map_err(|e| {
            BrokerError::InternalError(format!(
-                "Failed to join the timeline updates task, kind: {:?}, error: {e}",
-                self.kind
+                "Failed to join the broker value updates task, kind: {:?}, error: {e}",
+                self.key
            ))
        })?
    }
 }

-/// The subscription kind to the timeline updates from safekeeper.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct SkTimelineSubscriptionKind {
-    broker_etcd_prefix: String,
-    kind: SubscriptionKind,
-}
-
-impl SkTimelineSubscriptionKind {
-    pub fn all(broker_etcd_prefix: String) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::All,
-        }
-    }
-
-    pub fn tenant(broker_etcd_prefix: String, tenant: ZTenantId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Tenant(tenant),
-        }
-    }
-
-    pub fn timeline(broker_etcd_prefix: String, timeline: ZTenantTimelineId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Timeline(timeline),
-        }
-    }
-
-    fn watch_regex(&self) -> Regex {
-        match self.kind {
-            SubscriptionKind::All => Regex::new(&format!(
-                r"^{}/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'everything' subscription"),
-            SubscriptionKind::Tenant(tenant_id) => Regex::new(&format!(
-                r"^{}/{tenant_id}/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'tenant' subscription"),
-            SubscriptionKind::Timeline(ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            }) => Regex::new(&format!(
-                r"^{}/{tenant_id}/{timeline_id}/safekeeper/([[:digit:]])$",
-                self.broker_etcd_prefix
-            ))
-            .expect("wrong regex for 'timeline' subscription"),
-        }
-    }
-
-    /// Etcd key to use for watching a certain timeline updates from safekeepers.
-    pub fn watch_key(&self) -> String {
-        match self.kind {
-            SubscriptionKind::All => self.broker_etcd_prefix.to_string(),
-            SubscriptionKind::Tenant(tenant_id) => {
-                format!("{}/{tenant_id}/safekeeper", self.broker_etcd_prefix)
-            }
-            SubscriptionKind::Timeline(ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            }) => format!(
-                "{}/{tenant_id}/{timeline_id}/safekeeper",
-                self.broker_etcd_prefix
-            ),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-enum SubscriptionKind {
-    /// Get every timeline update.
-    All,
-    /// Get certain tenant timelines' updates.
-    Tenant(ZTenantId),
-    /// Get certain timeline updates.
-    Timeline(ZTenantTimelineId),
+#[derive(Debug, thiserror::Error)]
+pub enum BrokerError {
+    #[error("Etcd client error: {0}. Context: {1}")]
+    EtcdClient(etcd_client::Error, String),
+    #[error("Error during parsing etcd key: {0}")]
+    KeyNotParsed(String),
+    #[error("Internal error: {0}")]
+    InternalError(String),
 }

 /// Creates a background task to poll etcd for timeline updates from safekeepers.
 /// Stops and returns `Err` on any error during etcd communication.
 /// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
 /// exiting normally in such cases.
-pub async fn subscribe_to_safekeeper_timeline_updates(
+/// Etcd values are parsed as json fukes into a type, specified in the generic patameter.
+pub async fn subscribe_for_json_values<V>(
    client: &mut Client,
-    subscription: SkTimelineSubscriptionKind,
-) -> Result<SkTimelineSubscription, BrokerError> {
-    info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
+    key: SubscriptionKey,
+) -> Result<BrokerSubscription<V>, BrokerError>
+where
+    V: DeserializeOwned + Send + 'static,
+{
+    subscribe_for_values(client, key, |_, value_str| {
+        match serde_json::from_str::<V>(value_str) {
+            Ok(value) => Some(value),
+            Err(e) => {
+                error!("Failed to parse value str '{value_str}': {e}");
+                None
+            }
+        }
+    })
+    .await
+}
+
+/// Same as [`subscribe_for_json_values`], but allows to specify a custom parser of a etcd value string.
+pub async fn subscribe_for_values<P, V>(
+    client: &mut Client,
+    key: SubscriptionKey,
+    value_parser: P,
+) -> Result<BrokerSubscription<V>, BrokerError>
+where
+    V: Send + 'static,
+    P: Fn(SubscriptionFullKey, &str) -> Option<V> + Send + 'static,
+{
+    info!("Subscribing to broker value updates, key: {key:?}");
+    let subscription_key = key.clone();

    let (watcher, mut stream) = client
-        .watch(
-            subscription.watch_key(),
-            Some(WatchOptions::new().with_prefix()),
-        )
+        .watch(key.watch_key(), Some(WatchOptions::new().with_prefix()))
        .await
        .map_err(|e| {
            BrokerError::EtcdClient(
                e,
-                format!("Failed to init the watch for subscription {subscription:?}"),
+                format!("Failed to init the watch for subscription {key:?}"),
            )
        })?;

-    let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
-
-    let subscription_kind = subscription.kind;
-    let regex = subscription.watch_regex();
+    let (value_updates_sender, value_updates_receiver) = mpsc::unbounded_channel();
    let watcher_handle = tokio::spawn(async move {
        while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
-            "Failed to get messages from the subscription stream, kind: {subscription_kind:?}, error: {e}"
+            "Failed to get messages from the subscription stream, kind: {:?}, error: {e}", key.kind
        )))? {
            if resp.canceled() {
                info!("Watch for timeline updates subscription was canceled, exiting");
                break;
            }

-            let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>> = HashMap::new();
+            let mut value_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, V>> = HashMap::new();
            // Keep track that the timeline data updates from etcd arrive in the right order.
            // https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
            // > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
-            let mut timeline_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
+            let mut value_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();


            let events = resp.events();
@@ -238,113 +139,77 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
                    if let Some(new_etcd_kv) = event.kv() {
                        let new_kv_version = new_etcd_kv.version();

-                        match parse_etcd_key_value(subscription_kind, &regex, new_etcd_kv) {
-                            Ok(Some((zttid, timeline))) => {
-                                match timeline_updates
-                                    .entry(zttid)
-                                    .or_default()
-                                    .entry(timeline.safekeeper_id)
-                                {
-                                    hash_map::Entry::Occupied(mut o) => {
-                                        let old_etcd_kv_version = timeline_etcd_versions.get(&zttid).copied().unwrap_or(i64::MIN);
-                                        if old_etcd_kv_version < new_kv_version {
-                                            o.insert(timeline.info);
-                                            timeline_etcd_versions.insert(zttid,new_kv_version);
+                        match parse_etcd_kv(new_etcd_kv, &value_parser, &key.cluster_prefix) {
+                            Ok(Some((key, value))) => match value_updates
+                                .entry(key.id)
+                                .or_default()
+                                .entry(key.node_id)
+                                    {
+                                        hash_map::Entry::Occupied(mut o) => {
+                                            let old_etcd_kv_version = value_etcd_versions.get(&key.id).copied().unwrap_or(i64::MIN);
+                                            if old_etcd_kv_version < new_kv_version {
+                                                o.insert(value);
+                                                value_etcd_versions.insert(key.id,new_kv_version);
+                                            } else {
+                                                debug!("Skipping etcd timeline update due to older version compared to one that's already stored");
+                                            }
                                        }
-                                    }
-                                    hash_map::Entry::Vacant(v) => {
-                                        v.insert(timeline.info);
-                                        timeline_etcd_versions.insert(zttid,new_kv_version);
-                                    }
-                                }
-                            }
-                            Ok(None) => {}
-                            Err(e) => error!("Failed to parse timeline update: {e}"),
+                                        hash_map::Entry::Vacant(v) => {
+                                            v.insert(value);
+                                            value_etcd_versions.insert(key.id,new_kv_version);
+                                        }
+                                    },
+                            Ok(None) => debug!("Ignoring key {key:?} : no value was returned by the parser"),
+                            Err(BrokerError::KeyNotParsed(e)) => debug!("Unexpected key {key:?} for timeline update: {e}"),
+                            Err(e) => error!("Failed to represent etcd KV {new_etcd_kv:?}: {e}"),
                        };
                    }
                }
            }

-            if let Err(e) = timeline_updates_sender.send(timeline_updates) {
-                info!("Timeline updates sender got dropped, exiting: {e}");
-                break;
+            if !value_updates.is_empty() {
+                if let Err(e) = value_updates_sender.send(value_updates) {
+                    info!("Broker value updates for key {key:?} sender got dropped, exiting: {e}");
+                    break;
+                }
            }
        }

        Ok(())
-    });
+    }.instrument(info_span!("etcd_broker")));

-    Ok(SkTimelineSubscription {
-        kind: subscription,
-        safekeeper_timeline_updates,
+    Ok(BrokerSubscription {
+        key: subscription_key,
+        value_updates: value_updates_receiver,
        watcher_handle,
        watcher,
    })
 }

-fn parse_etcd_key_value(
-    subscription_kind: SubscriptionKind,
-    regex: &Regex,
+fn parse_etcd_kv<P, V>(
    kv: &KeyValue,
-) -> Result<Option<(ZTenantTimelineId, SafekeeperTimeline)>, BrokerError> {
-    let caps = if let Some(caps) = regex.captures(kv.key_str().map_err(|e| {
-        BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as key str"))
-    })?) {
-        caps
-    } else {
-        return Ok(None);
-    };
-
-    let (zttid, safekeeper_id) = match subscription_kind {
-        SubscriptionKind::All => (
-            ZTenantTimelineId::new(
-                parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
-                parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
-            ),
-            NodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
-        ),
-        SubscriptionKind::Tenant(tenant_id) => (
-            ZTenantTimelineId::new(
-                tenant_id,
-                parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
-            ),
-            NodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
-        ),
-        SubscriptionKind::Timeline(zttid) => (
-            zttid,
-            NodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
-        ),
-    };
-
-    let info_str = kv.value_str().map_err(|e| {
-        BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as value str"))
-    })?;
-    Ok(Some((
-        zttid,
-        SafekeeperTimeline {
-            safekeeper_id,
-            info: serde_json::from_str(info_str).map_err(|e| {
-                BrokerError::ParsingError(format!(
-                    "Failed to parse '{info_str}' as safekeeper timeline info: {e}"
-                ))
-            })?,
-        },
-    )))
-}
-
-fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
+    value_parser: &P,
+    cluster_prefix: &str,
+) -> Result<Option<(SubscriptionFullKey, V)>, BrokerError>
 where
-    T: FromStr,
-    <T as FromStr>::Err: Display,
+    P: Fn(SubscriptionFullKey, &str) -> Option<V>,
 {
-    let capture_match = caps
-        .get(index)
-        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
-        .as_str();
-    capture_match.parse().map_err(|e| {
-        format!(
-            "Failed to parse {} from {capture_match}: {e}",
-            std::any::type_name::<T>()
-        )
-    })
+    let key_str = kv.key_str().map_err(|e| {
+        BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
+    })?;
+    let value_str = kv.value_str().map_err(|e| {
+        BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
+    })?;
+
+    if !key_str.starts_with(cluster_prefix) {
+        return Err(BrokerError::KeyNotParsed(format!(
+            "KV has unexpected key '{key_str}' that does not start with cluster prefix {cluster_prefix}"
+        )));
+    }
+
+    let key = SubscriptionFullKey::from_str(&key_str[cluster_prefix.len()..]).map_err(|e| {
+        BrokerError::KeyNotParsed(format!("Failed to parse KV key '{key_str}': {e}"))
+    })?;
+
+    Ok(value_parser(key, value_str).map(|value| (key, value)))
 }
--- a/libs/etcd_broker/src/subscription_key.rs
+++ b/libs/etcd_broker/src/subscription_key.rs
@@ -0,0 +1,310 @@
+//! Etcd broker keys, used in the project and shared between instances.
+//! The keys are split into two categories:
+//!
+//! * [`SubscriptionFullKey`] full key format: `<cluster_prefix>/<tenant>/<timeline>/<node_kind>/<operation>/<node_id>`
+//! Always returned from etcd in this form, always start with the user key provided.
+//!
+//! * [`SubscriptionKey`] user input key format: always partial, since it's unknown which `node_id`'s are available.
+//! Full key always starts with the user input one, due to etcd subscription properties.
+
+use std::{fmt::Display, str::FromStr};
+
+use once_cell::sync::Lazy;
+use regex::{Captures, Regex};
+use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};
+
+/// The subscription kind to the timeline updates from safekeeper.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SubscriptionKey {
+    /// Generic cluster prefix, allowing to use the same etcd instance by multiple logic groups.
+    pub cluster_prefix: String,
+    /// The subscription kind.
+    pub kind: SubscriptionKind,
+}
+
+/// All currently possible key kinds of a etcd broker subscription.
+/// Etcd works so, that every key that starts with the subbscription key given is considered matching and
+/// returned as part of the subscrption.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SubscriptionKind {
+    /// Get every update in etcd.
+    All,
+    /// Get etcd updates for any timeiline of a certain tenant, affected by any operation from any node kind.
+    TenantTimelines(ZTenantId),
+    /// Get etcd updates for a certain timeline of a tenant, affected by any operation from any node kind.
+    Timeline(ZTenantTimelineId),
+    /// Get etcd timeline updates, specific to a certain node kind.
+    Node(ZTenantTimelineId, NodeKind),
+    /// Get etcd timeline updates for a certain operation on specific nodes.
+    Operation(ZTenantTimelineId, NodeKind, OperationKind),
+}
+
+/// All kinds of nodes, able to write into etcd.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum NodeKind {
+    Safekeeper,
+    Pageserver,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum OperationKind {
+    Safekeeper(SkOperationKind),
+}
+
+/// Current operations, running inside the safekeeper node.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SkOperationKind {
+    TimelineInfo,
+    WalBackup,
+}
+
+static SUBSCRIPTION_FULL_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/([^/]+)/([^/]+)/([[:digit:]]+)$")
+        .expect("wrong subscription full etcd key regex")
+});
+
+/// Full key, received from etcd during any of the component's work.
+/// No other etcd keys are considered during system's work.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct SubscriptionFullKey {
+    pub id: ZTenantTimelineId,
+    pub node_kind: NodeKind,
+    pub operation: OperationKind,
+    pub node_id: NodeId,
+}
+
+impl SubscriptionKey {
+    /// Subscribes for all etcd updates.
+    pub fn all(cluster_prefix: String) -> Self {
+        SubscriptionKey {
+            cluster_prefix,
+            kind: SubscriptionKind::All,
+        }
+    }
+
+    /// Subscribes to a given timeline info updates from safekeepers.
+    pub fn sk_timeline_info(cluster_prefix: String, timeline: ZTenantTimelineId) -> Self {
+        Self {
+            cluster_prefix,
+            kind: SubscriptionKind::Operation(
+                timeline,
+                NodeKind::Safekeeper,
+                OperationKind::Safekeeper(SkOperationKind::TimelineInfo),
+            ),
+        }
+    }
+
+    /// Subscribes to all timeine updates during specific operations, running on the corresponding nodes.
+    pub fn operation(
+        cluster_prefix: String,
+        timeline: ZTenantTimelineId,
+        node_kind: NodeKind,
+        operation: OperationKind,
+    ) -> Self {
+        Self {
+            cluster_prefix,
+            kind: SubscriptionKind::Operation(timeline, node_kind, operation),
+        }
+    }
+
+    /// Etcd key to use for watching a certain timeline updates from safekeepers.
+    pub fn watch_key(&self) -> String {
+        let cluster_prefix = &self.cluster_prefix;
+        match self.kind {
+            SubscriptionKind::All => cluster_prefix.to_string(),
+            SubscriptionKind::TenantTimelines(tenant_id) => {
+                format!("{cluster_prefix}/{tenant_id}")
+            }
+            SubscriptionKind::Timeline(id) => {
+                format!("{cluster_prefix}/{id}")
+            }
+            SubscriptionKind::Node(id, node_kind) => {
+                format!("{cluster_prefix}/{id}/{node_kind}")
+            }
+            SubscriptionKind::Operation(id, node_kind, operation_kind) => {
+                format!("{cluster_prefix}/{id}/{node_kind}/{operation_kind}")
+            }
+        }
+    }
+}
+
+impl Display for OperationKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            OperationKind::Safekeeper(o) => o.fmt(f),
+        }
+    }
+}
+
+impl FromStr for OperationKind {
+    type Err = String;
+
+    fn from_str(operation_kind_str: &str) -> Result<Self, Self::Err> {
+        match operation_kind_str {
+            "timeline_info" => Ok(OperationKind::Safekeeper(SkOperationKind::TimelineInfo)),
+            "wal_backup" => Ok(OperationKind::Safekeeper(SkOperationKind::WalBackup)),
+            _ => Err(format!("Unknown operation kind: {operation_kind_str}")),
+        }
+    }
+}
+
+impl Display for SubscriptionFullKey {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self {
+            id,
+            node_kind,
+            operation,
+            node_id,
+        } = self;
+        write!(f, "{id}/{node_kind}/{operation}/{node_id}")
+    }
+}
+
+impl FromStr for SubscriptionFullKey {
+    type Err = String;
+
+    fn from_str(subscription_kind_str: &str) -> Result<Self, Self::Err> {
+        let key_captures = match SUBSCRIPTION_FULL_KEY_REGEX.captures(subscription_kind_str) {
+            Some(captures) => captures,
+            None => {
+                return Err(format!(
+                    "Subscription kind str does not match a subscription full key regex {}",
+                    SUBSCRIPTION_FULL_KEY_REGEX.as_str()
+                ));
+            }
+        };
+
+        Ok(Self {
+            id: ZTenantTimelineId::new(
+                parse_capture(&key_captures, 1)?,
+                parse_capture(&key_captures, 2)?,
+            ),
+            node_kind: parse_capture(&key_captures, 3)?,
+            operation: parse_capture(&key_captures, 4)?,
+            node_id: NodeId(parse_capture(&key_captures, 5)?),
+        })
+    }
+}
+
+fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
+where
+    T: FromStr,
+    <T as FromStr>::Err: Display,
+{
+    let capture_match = caps
+        .get(index)
+        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
+        .as_str();
+    capture_match.parse().map_err(|e| {
+        format!(
+            "Failed to parse {} from {capture_match}: {e}",
+            std::any::type_name::<T>()
+        )
+    })
+}
+
+impl Display for NodeKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Safekeeper => write!(f, "safekeeper"),
+            Self::Pageserver => write!(f, "pageserver"),
+        }
+    }
+}
+
+impl FromStr for NodeKind {
+    type Err = String;
+
+    fn from_str(node_kind_str: &str) -> Result<Self, Self::Err> {
+        match node_kind_str {
+            "safekeeper" => Ok(Self::Safekeeper),
+            "pageserver" => Ok(Self::Pageserver),
+            _ => Err(format!("Invalid node kind: {node_kind_str}")),
+        }
+    }
+}
+
+impl Display for SkOperationKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::TimelineInfo => write!(f, "timeline_info"),
+            Self::WalBackup => write!(f, "wal_backup"),
+        }
+    }
+}
+
+impl FromStr for SkOperationKind {
+    type Err = String;
+
+    fn from_str(operation_str: &str) -> Result<Self, Self::Err> {
+        match operation_str {
+            "timeline_info" => Ok(Self::TimelineInfo),
+            "wal_backup" => Ok(Self::WalBackup),
+            _ => Err(format!("Invalid operation: {operation_str}")),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use utils::zid::ZTimelineId;
+
+    use super::*;
+
+    #[test]
+    fn full_cluster_key_parsing() {
+        let prefix = "neon";
+        let node_kind = NodeKind::Safekeeper;
+        let operation_kind = OperationKind::Safekeeper(SkOperationKind::WalBackup);
+        let tenant_id = ZTenantId::generate();
+        let timeline_id = ZTimelineId::generate();
+        let id = ZTenantTimelineId::new(tenant_id, timeline_id);
+        let node_id = NodeId(1);
+
+        let timeline_subscription_keys = [
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::All,
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::TenantTimelines(tenant_id),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Timeline(id),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Node(id, node_kind),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Operation(id, node_kind, operation_kind),
+            },
+        ];
+
+        let full_key_string = format!(
+            "{}/{node_id}",
+            timeline_subscription_keys.last().unwrap().watch_key()
+        );
+
+        for key in timeline_subscription_keys {
+            assert!(full_key_string.starts_with(&key.watch_key()), "Full key '{full_key_string}' should start with any of the keys, keys, but {key:?} did not match");
+        }
+
+        let full_key = SubscriptionFullKey::from_str(&full_key_string).unwrap_or_else(|e| {
+            panic!("Failed to parse {full_key_string} as a subscription full key: {e}")
+        });
+
+        assert_eq!(
+            full_key,
+            SubscriptionFullKey {
+                id,
+                node_kind,
+                operation: operation_kind,
+                node_id
+            }
+        )
+    }
+}
--- a/libs/etcd_broker/src/subscription_value.rs
+++ b/libs/etcd_broker/src/subscription_value.rs
@@ -0,0 +1,35 @@
+//! Module for the values to put into etcd.
+
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+use utils::lsn::Lsn;
+
+/// Data about safekeeper's timeline. Fields made optional for easy migrations.
+#[serde_as]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct SkTimelineInfo {
+    /// Term of the last entry.
+    pub last_log_term: Option<u64>,
+    /// LSN of the last record.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub flush_lsn: Option<Lsn>,
+    /// Up to which LSN safekeeper regards its WAL as committed.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub commit_lsn: Option<Lsn>,
+    /// LSN up to which safekeeper has backed WAL.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub backup_lsn: Option<Lsn>,
+    /// LSN of last checkpoint uploaded by pageserver.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub remote_consistent_lsn: Option<Lsn>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub peer_horizon_lsn: Option<Lsn>,
+    /// A connection string to use for WAL receiving.
+    #[serde(default)]
+    pub safekeeper_connstr: Option<String>,
+}
--- a/libs/postgres_ffi/wal_generate/src/lib.rs
+++ b/libs/postgres_ffi/wal_generate/src/lib.rs
@@ -4,6 +4,7 @@ use log::*;
 use postgres::types::PgLsn;
 use postgres::Client;
 use std::cmp::Ordering;
+use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use std::time::Instant;
@@ -69,6 +70,12 @@ impl Conf {

    pub fn start_server(&self) -> Result<PostgresServer> {
        info!("Starting Postgres server in {:?}", self.datadir);
+        let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| {
+            format!(
+                "Failed to create pg.log file in directory {}",
+                self.datadir.display()
+            )
+        })?;
        let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
        let unix_socket_dir_path = unix_socket_dir.path().to_owned();
        let server_process = self
@@ -84,7 +91,7 @@ impl Conf {
            // Disable background processes as much as possible
            .args(&["-c", "wal_writer_delay=10s"])
            .args(&["-c", "autovacuum=off"])
-            .stderr(Stdio::null())
+            .stderr(Stdio::from(log_file))
            .spawn()?;
        let server = PostgresServer {
            process: server_process,
--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -13,13 +13,10 @@ use std::fmt;
 use std::io::{self, Write};
 use std::net::{Shutdown, SocketAddr, TcpStream};
 use std::str::FromStr;
-use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 use std::time::Duration;
 use tracing::*;

-static PGBACKEND_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
-
 pub trait Handler {
    /// Handle single query.
    /// postgres_backend will issue ReadyForQuery after calling this (this
@@ -45,6 +42,10 @@ pub trait Handler {
    fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
        bail!("JWT auth failed")
    }
+
+    fn is_shutdown_requested(&self) -> bool {
+        false
+    }
 }

 /// PostgresBackend protocol state.
@@ -274,7 +275,7 @@ impl PostgresBackend {

        let mut unnamed_query_string = Bytes::new();

-        while !PGBACKEND_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) {
+        while !handler.is_shutdown_requested() {
            match self.read_message() {
                Ok(message) => {
                    if let Some(msg) = message {
@@ -493,8 +494,3 @@ impl PostgresBackend {
        Ok(ProcessMsgResult::Continue)
    }
 }
-
-// Set the flag to inform connections to cancel
-pub fn set_pgbackend_shutdown_requested() {
-    PGBACKEND_SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
-}
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -269,15 +269,18 @@ impl FeStartupPacket {
                            .next()
                            .context("expected even number of params in StartupMessage")?;
                        if name == "options" {
-                            //parsing options arguments "..&options=<var>:<val>,.."
-                            //extended example and set of options:
-                            //https://github.com/neondatabase/neon/blob/main/docs/rfcs/016-connection-routing.md#connection-url
-                            for cmdopt in value.split(',') {
-                                let nameval: Vec<&str> = cmdopt.split(':').collect();
+                            // parsing options arguments "...&options=<var0>%3D<val0>+<var1>=<var1>..."
+                            // '%3D' is '=' and '+' is ' '
+
+                            // Note: we allow users that don't have SNI capabilities,
+                            // to pass a special keyword argument 'project'
+                            // to be used to determine the cluster name by the proxy.
+
+                            //TODO: write unit test for this and refactor in its own function.
+                            for cmdopt in value.split(' ') {
+                                let nameval: Vec<&str> = cmdopt.split('=').collect();
                                if nameval.len() == 2 {
                                    params.insert(nameval[0].to_string(), nameval[1].to_string());
-                                } else {
-                                    //todo: inform user / throw error message if options format is wrong.
                                }
                            }
                        } else {
@@ -923,10 +926,10 @@ impl<'a> BeMessage<'a> {
    }
 }

-// Zenith extension of postgres replication protocol
-// See ZENITH_STATUS_UPDATE_TAG_BYTE
+// Neon extension of postgres replication protocol
+// See NEON_STATUS_UPDATE_TAG_BYTE
 #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
-pub struct ZenithFeedback {
+pub struct ReplicationFeedback {
    // Last known size of the timeline. Used to enforce timeline size limit.
    pub current_timeline_size: u64,
    // Parts of StandbyStatusUpdate we resend to compute via safekeeper
@@ -936,13 +939,13 @@ pub struct ZenithFeedback {
    pub ps_replytime: SystemTime,
 }

-// NOTE: Do not forget to increment this number when adding new fields to ZenithFeedback.
+// NOTE: Do not forget to increment this number when adding new fields to ReplicationFeedback.
 // Do not remove previously available fields because this might be backwards incompatible.
-pub const ZENITH_FEEDBACK_FIELDS_NUMBER: u8 = 5;
+pub const REPLICATION_FEEDBACK_FIELDS_NUMBER: u8 = 5;

-impl ZenithFeedback {
-    pub fn empty() -> ZenithFeedback {
-        ZenithFeedback {
+impl ReplicationFeedback {
+    pub fn empty() -> ReplicationFeedback {
+        ReplicationFeedback {
            current_timeline_size: 0,
            ps_writelsn: 0,
            ps_applylsn: 0,
@@ -951,7 +954,7 @@ impl ZenithFeedback {
        }
    }

-    // Serialize ZenithFeedback using custom format
+    // Serialize ReplicationFeedback using custom format
    // to support protocol extensibility.
    //
    // Following layout is used:
@@ -962,7 +965,7 @@ impl ZenithFeedback {
    // uint32 - value length in bytes
    // value itself
    pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
-        buf.put_u8(ZENITH_FEEDBACK_FIELDS_NUMBER); // # of keys
+        buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
        write_cstr(&Bytes::from("current_timeline_size"), buf)?;
        buf.put_i32(8);
        buf.put_u64(self.current_timeline_size);
@@ -989,9 +992,9 @@ impl ZenithFeedback {
        Ok(())
    }

-    // Deserialize ZenithFeedback message
-    pub fn parse(mut buf: Bytes) -> ZenithFeedback {
-        let mut zf = ZenithFeedback::empty();
+    // Deserialize ReplicationFeedback message
+    pub fn parse(mut buf: Bytes) -> ReplicationFeedback {
+        let mut zf = ReplicationFeedback::empty();
        let nfields = buf.get_u8();
        let mut i = 0;
        while i < nfields {
@@ -1032,14 +1035,14 @@ impl ZenithFeedback {
                _ => {
                    let len = buf.get_i32();
                    warn!(
-                        "ZenithFeedback parse. unknown key {} of len {}. Skip it.",
+                        "ReplicationFeedback parse. unknown key {} of len {}. Skip it.",
                        key, len
                    );
                    buf.advance(len as usize);
                }
            }
        }
-        trace!("ZenithFeedback parsed is {:?}", zf);
+        trace!("ReplicationFeedback parsed is {:?}", zf);
        zf
    }
 }
@@ -1049,8 +1052,8 @@ mod tests {
    use super::*;

    #[test]
-    fn test_zenithfeedback_serialization() {
-        let mut zf = ZenithFeedback::empty();
+    fn test_replication_feedback_serialization() {
+        let mut zf = ReplicationFeedback::empty();
        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
@@ -1059,13 +1062,13 @@ mod tests {
        let mut data = BytesMut::new();
        zf.serialize(&mut data).unwrap();

-        let zf_parsed = ZenithFeedback::parse(data.freeze());
+        let zf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(zf, zf_parsed);
    }

    #[test]
-    fn test_zenithfeedback_unknown_key() {
-        let mut zf = ZenithFeedback::empty();
+    fn test_replication_feedback_unknown_key() {
+        let mut zf = ReplicationFeedback::empty();
        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
@@ -1076,7 +1079,7 @@ mod tests {

        // Add an extra field to the buffer and adjust number of keys
        if let Some(first) = data.first_mut() {
-            *first = ZENITH_FEEDBACK_FIELDS_NUMBER + 1;
+            *first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1;
        }

        write_cstr(&Bytes::from("new_field_one"), &mut data).unwrap();
@@ -1084,7 +1087,7 @@ mod tests {
        data.put_u64(42);

        // Parse serialized data and check that new field is not parsed
-        let zf_parsed = ZenithFeedback::parse(data.freeze());
+        let zf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(zf, zf_parsed);
    }

--- a/pageserver/README.md
+++ b/pageserver/README.md
@@ -69,7 +69,7 @@ Repository

 The repository stores all the page versions, or WAL records needed to
 reconstruct them. Each tenant has a separate Repository, which is
-stored in the .zenith/tenants/<tenantid> directory.
+stored in the .neon/tenants/<tenantid> directory.

 Repository is an abstract trait, defined in `repository.rs`. It is
 implemented by the LayeredRepository object in
@@ -92,7 +92,7 @@ Each repository also has a WAL redo manager associated with it, see
 records, whenever we need to reconstruct a page version from WAL to
 satisfy a GetPage@LSN request, or to avoid accumulating too much WAL
 for a page. The WAL redo manager uses a Postgres process running in
-special zenith wal-redo mode to do the actual WAL redo, and
+special Neon wal-redo mode to do the actual WAL redo, and
 communicates with the process using a pipe.


--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -104,7 +104,7 @@ fn main() -> anyhow::Result<()> {
        return Ok(());
    }

-    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".zenith"));
+    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
    let workdir = workdir
        .canonicalize()
        .with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -4,7 +4,7 @@
 //! The functions here are responsible for locating the correct layer for the
 //! get/put call, tracing timeline branching history as needed.
 //!
-//! The files are stored in the .zenith/tenants/<tenantid>/timelines/<timelineid>
+//! The files are stored in the .neon/tenants/<tenantid>/timelines/<timelineid>
 //! directory. See layered_repository/README for how the files are managed.
 //! In addition to the layer files, there is a metadata file in the same
 //! directory that contains information about the timeline, in particular its
@@ -148,7 +148,7 @@ lazy_static! {
    .expect("failed to define a metric");
 }

-/// Parts of the `.zenith/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
+/// Parts of the `.neon/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
 pub const TIMELINES_SEGMENT_NAME: &str = "timelines";

 ///
@@ -1727,9 +1727,7 @@ impl LayeredTimeline {
            new_delta_path.clone(),
            self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
        ])?;
-        fail_point!("checkpoint-before-sync");
-
-        fail_point!("flush-frozen");
+        fail_point!("flush-frozen-before-sync");

        // Finally, replace the frozen in-memory layer with the new on-disk layer
        {
--- a/pageserver/src/layered_repository/README.md
+++ b/pageserver/src/layered_repository/README.md
@@ -123,7 +123,7 @@ The files are called "layer files". Each layer file covers a range of keys, and
 a range of LSNs (or a single LSN, in case of image layers). You can think of it
 as a rectangle in the two-dimensional key-LSN space. The layer files for each
 timeline are stored in the timeline's subdirectory under
-`.zenith/tenants/<tenantid>/timelines`.
+`.neon/tenants/<tenantid>/timelines`.

 There are two kind of layer files: images, and delta layers. An image file
 contains a snapshot of all keys at a particular LSN, whereas a delta file
@@ -178,7 +178,7 @@ version, and how branching and GC works is still valid.
 The full path of a delta file looks like this:

 ```
-    .zenith/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
+    .neon/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
 ```

 For simplicity, the examples below use a simplified notation for the
@@ -409,7 +409,7 @@ removed because there is no newer layer file for the table.

 Things get slightly more complicated with multiple branches. All of
 the above still holds, but in addition to recent files we must also
-retain older shapshot files that are still needed by child branches.
+retain older snapshot files that are still needed by child branches.
 For example, if child branch is created at LSN 150, and the 'customers'
 table is updated on the branch, you would have these files:

--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -24,7 +24,6 @@ pub mod walredo;

 use lazy_static::lazy_static;
 use tracing::info;
-use utils::postgres_backend;

 use crate::thread_mgr::ThreadKind;
 use metrics::{register_int_gauge_vec, IntGaugeVec};
@@ -73,7 +72,6 @@ pub fn shutdown_pageserver(exit_code: i32) {
    thread_mgr::shutdown_threads(Some(ThreadKind::LibpqEndpointListener), None, None);

    // Shut down any page service threads.
-    postgres_backend::set_pgbackend_shutdown_requested();
    thread_mgr::shutdown_threads(Some(ThreadKind::PageRequestHandler), None, None);

    // Shut down all the tenants. This flushes everything to disk and kills
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -20,7 +20,7 @@
 //! assign a buffer for a page, you must hold the mapping lock and the lock on
 //! the slot at the same time.
 //!
-//! Whenever you need to hold both locks simultenously, the slot lock must be
+//! Whenever you need to hold both locks simultaneously, the slot lock must be
 //! acquired first. This consistent ordering avoids deadlocks. To look up a page
 //! in the cache, you would first look up the mapping, while holding the mapping
 //! lock, and then lock the slot. You must release the mapping lock in between,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -370,6 +370,10 @@ impl PageServerHandler {
    ) -> anyhow::Result<()> {
        let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered();

+        // NOTE: pagerequests handler exits when connection is closed,
+        //       so there is no need to reset the association
+        thread_mgr::associate_with(Some(tenantid), Some(timelineid));
+
        // Check that the timeline exists
        let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
            .context("Cannot load local timeline")?;
@@ -672,6 +676,10 @@ impl postgres_backend::Handler for PageServerHandler {
        Ok(())
    }

+    fn is_shutdown_requested(&self) -> bool {
+        thread_mgr::is_shutdown_requested()
+    }
+
    fn process_query(
        &mut self,
        pgb: &mut PostgresBackend,
@@ -802,7 +810,6 @@ impl postgres_backend::Handler for PageServerHandler {
                .map(|h| h.as_str().parse())
                .unwrap_or_else(|| Ok(repo.get_gc_horizon()))?;

-            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
            // Use tenant's pitr setting
            let pitr = repo.get_pitr_interval();
            let result = repo.gc_iteration(Some(timelineid), gc_horizon, pitr, true)?;
--- a/pageserver/src/remote_storage/storage_sync/delete.rs
+++ b/pageserver/src/remote_storage/storage_sync/delete.rs
@@ -1,223 +0,0 @@
-//! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage.
-
-use anyhow::Context;
-use futures::stream::{FuturesUnordered, StreamExt};
-use tracing::{debug, error, info};
-use utils::zid::ZTenantTimelineId;
-
-use crate::remote_storage::{
-    storage_sync::{SyncQueue, SyncTask},
-    RemoteStorage,
-};
-
-use super::{LayersDeletion, SyncData};
-
-/// Attempts to remove the timleline layers from the remote storage.
-/// If the task had not adjusted the metadata before, the deletion will fail.
-pub(super) async fn delete_timeline_layers<'a, P, S>(
-    storage: &'a S,
-    sync_queue: &SyncQueue,
-    sync_id: ZTenantTimelineId,
-    mut delete_data: SyncData<LayersDeletion>,
-) -> bool
-where
-    P: std::fmt::Debug + Send + Sync + 'static,
-    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
-{
-    if !delete_data.data.deletion_registered {
-        error!("Cannot delete timeline layers before the deletion metadata is not registered, reenqueueing");
-        delete_data.retries += 1;
-        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-        return false;
-    }
-
-    if delete_data.data.layers_to_delete.is_empty() {
-        info!("No layers to delete, skipping");
-        return true;
-    }
-
-    let layers_to_delete = delete_data
-        .data
-        .layers_to_delete
-        .drain()
-        .collect::<Vec<_>>();
-    debug!("Layers to delete: {layers_to_delete:?}");
-    info!("Deleting {} timeline layers", layers_to_delete.len());
-
-    let mut delete_tasks = layers_to_delete
-        .into_iter()
-        .map(|local_layer_path| async {
-            let storage_path = match storage.storage_path(&local_layer_path).with_context(|| {
-                format!(
-                    "Failed to get the layer storage path for local path '{}'",
-                    local_layer_path.display()
-                )
-            }) {
-                Ok(path) => path,
-                Err(e) => return Err((e, local_layer_path)),
-            };
-
-            match storage.delete(&storage_path).await.with_context(|| {
-                format!(
-                    "Failed to delete remote layer from storage at '{:?}'",
-                    storage_path
-                )
-            }) {
-                Ok(()) => Ok(local_layer_path),
-                Err(e) => Err((e, local_layer_path)),
-            }
-        })
-        .collect::<FuturesUnordered<_>>();
-
-    let mut errored = false;
-    while let Some(deletion_result) = delete_tasks.next().await {
-        match deletion_result {
-            Ok(local_layer_path) => {
-                debug!(
-                    "Successfully deleted layer {} for timeline {sync_id}",
-                    local_layer_path.display()
-                );
-                delete_data.data.deleted_layers.insert(local_layer_path);
-            }
-            Err((e, local_layer_path)) => {
-                errored = true;
-                error!(
-                    "Failed to delete layer {} for timeline {sync_id}: {e:?}",
-                    local_layer_path.display()
-                );
-                delete_data.data.layers_to_delete.insert(local_layer_path);
-            }
-        }
-    }
-
-    if errored {
-        debug!("Reenqueuing failed delete task for timeline {sync_id}");
-        delete_data.retries += 1;
-        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-    }
-    errored
-}
-
-#[cfg(test)]
-mod tests {
-    use std::{collections::HashSet, num::NonZeroUsize};
-
-    use itertools::Itertools;
-    use tempfile::tempdir;
-    use tokio::fs;
-    use utils::lsn::Lsn;
-
-    use crate::{
-        remote_storage::{
-            storage_sync::test_utils::{create_local_timeline, dummy_metadata},
-            LocalFs,
-        },
-        repository::repo_harness::{RepoHarness, TIMELINE_ID},
-    };
-
-    use super::*;
-
-    #[tokio::test]
-    async fn delete_timeline_negative() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("delete_timeline_negative")?;
-        let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
-        let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
-
-        let deleted = delete_timeline_layers(
-            &storage,
-            &sync_queue,
-            sync_id,
-            SyncData {
-                retries: 1,
-                data: LayersDeletion {
-                    deleted_layers: HashSet::new(),
-                    layers_to_delete: HashSet::new(),
-                    deletion_registered: false,
-                },
-            },
-        )
-        .await;
-
-        assert!(
-            !deleted,
-            "Should not start the deletion for task with delete metadata unregistered"
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn delete_timeline() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("delete_timeline")?;
-        let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
-
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
-        let layer_files = ["a", "b", "c", "d"];
-        let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
-        let current_retries = 3;
-        let metadata = dummy_metadata(Lsn(0x30));
-        let local_timeline_path = harness.timeline_path(&TIMELINE_ID);
-        let timeline_upload =
-            create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
-        for local_path in timeline_upload.layers_to_upload {
-            let remote_path = storage.storage_path(&local_path)?;
-            let remote_parent_dir = remote_path.parent().unwrap();
-            if !remote_parent_dir.exists() {
-                fs::create_dir_all(&remote_parent_dir).await?;
-            }
-            fs::copy(&local_path, &remote_path).await?;
-        }
-        assert_eq!(
-            storage
-                .list()
-                .await?
-                .into_iter()
-                .map(|remote_path| storage.local_path(&remote_path).unwrap())
-                .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
-                .sorted()
-                .collect::<Vec<_>>(),
-            layer_files
-                .iter()
-                .map(|layer_str| layer_str.to_string())
-                .sorted()
-                .collect::<Vec<_>>(),
-            "Expect to have all layer files remotely before deletion"
-        );
-
-        let deleted = delete_timeline_layers(
-            &storage,
-            &sync_queue,
-            sync_id,
-            SyncData {
-                retries: current_retries,
-                data: LayersDeletion {
-                    deleted_layers: HashSet::new(),
-                    layers_to_delete: HashSet::from([
-                        local_timeline_path.join("a"),
-                        local_timeline_path.join("c"),
-                        local_timeline_path.join("something_different"),
-                    ]),
-                    deletion_registered: true,
-                },
-            },
-        )
-        .await;
-        assert!(deleted, "Should be able to delete timeline files");
-
-        assert_eq!(
-            storage
-                .list()
-                .await?
-                .into_iter()
-                .map(|remote_path| storage.local_path(&remote_path).unwrap())
-                .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
-                .sorted()
-                .collect::<Vec<_>>(),
-            vec!["b".to_string(), "d".to_string()],
-            "Expect to have only non-deleted files remotely"
-        );
-
-        Ok(())
-    }
-}
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -195,8 +195,9 @@ impl Display for TimelineSyncStatusUpdate {
        f.write_str(s)
    }
 }
+
 ///
-/// A repository corresponds to one .zenith directory. One repository holds multiple
+/// A repository corresponds to one .neon directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
 pub trait Repository: Send + Sync {
    type Timeline: Timeline;
@@ -242,7 +243,7 @@ pub trait Repository: Send + Sync {
    ///
    /// 'timelineid' specifies the timeline to GC, or None for all.
    /// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval).
-    /// `checkpoint_before_gc` parameter is used to force compaction of storage before CG
+    /// `checkpoint_before_gc` parameter is used to force compaction of storage before GC
    /// to make tests more deterministic.
    /// TODO Do we still need it or we can call checkpoint explicitly in tests where needed?
    fn gc_iteration(
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -186,8 +186,8 @@ use crate::{
 };

 use metrics::{
-    register_histogram_vec, register_int_counter, register_int_gauge, HistogramVec, IntCounter,
-    IntGauge,
+    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
+    HistogramVec, IntCounter, IntCounterVec, IntGauge,
 };
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

@@ -208,14 +208,17 @@ lazy_static! {
    static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_remote_storage_image_sync_seconds",
        "Time took to synchronize (download or upload) a whole pageserver image. \
-        Grouped by `operation_kind` (upload|download) and `status` (success|failure)",
-        &["operation_kind", "status"],
-        vec![
-            0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0,
-            8.0, 9.0, 10.0, 12.5, 15.0, 17.5, 20.0
-        ]
+        Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
+        &["tenant_id", "timeline_id", "operation_kind", "status"],
+        vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
    )
    .expect("failed to register pageserver image sync time histogram vec");
+    static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
+        "pageserver_remote_storage_remote_index_uploads_total",
+        "Number of remote index uploads",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register pageserver remote index upload vec");
 }

 static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();
@@ -892,7 +895,7 @@ fn storage_sync_loop<P, S>(

        REMAINING_SYNC_ITEMS.set(remaining_queue_length as i64);
        if remaining_queue_length > 0 || !batched_tasks.is_empty() {
-            info!("Processing tasks for {} timelines in batch, more tasks left to process: {remaining_queue_length}", batched_tasks.len());
+            debug!("Processing tasks for {} timelines in batch, more tasks left to process: {remaining_queue_length}", batched_tasks.len());
        } else {
            debug!("No tasks to process");
            continue;
@@ -1146,19 +1149,19 @@ where
    .await
    {
        DownloadedTimeline::Abort => {
-            register_sync_status(sync_start, task_name, None);
+            register_sync_status(sync_id, sync_start, task_name, None);
            if let Err(e) = index.write().await.set_awaits_download(&sync_id, false) {
                error!("Timeline {sync_id} was expected to be in the remote index after a download attempt, but it's absent: {e:?}");
            }
        }
        DownloadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
        }
        DownloadedTimeline::Successful(mut download_data) => {
            match update_local_metadata(conf, sync_id, current_remote_timeline).await {
                Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
                    Ok(()) => {
-                        register_sync_status(sync_start, task_name, Some(true));
+                        register_sync_status(sync_id, sync_start, task_name, Some(true));
                        return Some(TimelineSyncStatusUpdate::Downloaded);
                    }
                    Err(e) => {
@@ -1169,7 +1172,7 @@ where
                    error!("Failed to update local timeline metadata: {e:?}");
                    download_data.retries += 1;
                    sync_queue.push(sync_id, SyncTask::Download(download_data));
-                    register_sync_status(sync_start, task_name, Some(false));
+                    register_sync_status(sync_id, sync_start, task_name, Some(false));
                }
            }
        }
@@ -1186,7 +1189,7 @@ async fn update_local_metadata(
    let remote_metadata = match remote_timeline {
        Some(timeline) => &timeline.metadata,
        None => {
-            info!("No remote timeline to update local metadata from, skipping the update");
+            debug!("No remote timeline to update local metadata from, skipping the update");
            return Ok(());
        }
    };
@@ -1265,14 +1268,14 @@ async fn delete_timeline_data<P, S>(
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            new_delete_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Delete(new_delete_data));
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
            return;
        }
    }
    timeline_delete.deletion_registered = true;

    let sync_status = delete_timeline_layers(storage, sync_queue, sync_id, new_delete_data).await;
-    register_sync_status(sync_start, task_name, Some(sync_status));
+    register_sync_status(sync_id, sync_start, task_name, Some(sync_status));
 }

 async fn read_metadata_file(metadata_path: &Path) -> anyhow::Result<TimelineMetadata> {
@@ -1306,7 +1309,7 @@ async fn upload_timeline_data<P, S>(
    .await
    {
        UploadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
            return;
        }
        UploadedTimeline::Successful(upload_data) => upload_data,
@@ -1325,13 +1328,13 @@ async fn upload_timeline_data<P, S>(
    .await
    {
        Ok(()) => {
-            register_sync_status(sync_start, task_name, Some(true));
+            register_sync_status(sync_id, sync_start, task_name, Some(true));
        }
        Err(e) => {
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            uploaded_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
        }
    }
 }
@@ -1421,7 +1424,14 @@ where
        IndexPart::from_remote_timeline(&timeline_path, updated_remote_timeline)
            .context("Failed to create an index part from the updated remote timeline")?;

-    info!("Uploading remote index for the timeline");
+    debug!("Uploading remote index for the timeline");
+    REMOTE_INDEX_UPLOAD
+        .with_label_values(&[
+            &sync_id.tenant_id.to_string(),
+            &sync_id.timeline_id.to_string(),
+        ])
+        .inc();
+
    upload_index_part(conf, storage, sync_id, new_index_part)
        .await
        .context("Failed to upload new index part")
@@ -1590,12 +1600,24 @@ fn compare_local_and_remote_timeline(
    (initial_timeline_status, awaits_download)
 }

-fn register_sync_status(sync_start: Instant, sync_name: &str, sync_status: Option<bool>) {
+fn register_sync_status(
+    sync_id: ZTenantTimelineId,
+    sync_start: Instant,
+    sync_name: &str,
+    sync_status: Option<bool>,
+) {
    let secs_elapsed = sync_start.elapsed().as_secs_f64();
-    info!("Processed a sync task in {secs_elapsed:.2} seconds");
+    debug!("Processed a sync task in {secs_elapsed:.2} seconds");
+
+    let tenant_id = sync_id.tenant_id.to_string();
+    let timeline_id = sync_id.timeline_id.to_string();
    match sync_status {
-        Some(true) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "success"]),
-        Some(false) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "failure"]),
+        Some(true) => {
+            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "success"])
+        }
+        Some(false) => {
+            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "failure"])
+        }
        None => return,
    }
    .observe(secs_elapsed)
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -4,6 +4,7 @@ use std::{fmt::Debug, path::PathBuf};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
+use lazy_static::lazy_static;
 use remote_storage::RemoteStorage;
 use tokio::fs;
 use tracing::{debug, error, info, warn};
@@ -17,6 +18,16 @@ use super::{
 use crate::{
    config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
 };
+use metrics::{register_int_counter_vec, IntCounterVec};
+
+lazy_static! {
+    static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
+        "pageserver_remote_storage_no_layers_uploads_total",
+        "Number of skipped uploads due to no layers",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register pageserver no layers upload vec");
+}

 /// Serializes and uploads the given index part data to the remote storage.
 pub(super) async fn upload_index_part<P, S>(
@@ -102,7 +113,13 @@ where
        .collect::<Vec<_>>();

    if layers_to_upload.is_empty() {
-        info!("No layers to upload after filtering, aborting");
+        debug!("No layers to upload after filtering, aborting");
+        NO_LAYERS_UPLOAD
+            .with_label_values(&[
+                &sync_id.tenant_id.to_string(),
+                &sync_id.timeline_id.to_string(),
+            ])
+            .inc();
        return UploadedTimeline::Successful(upload_data);
    }

--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -37,7 +37,7 @@ pub mod defaults {
    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
-    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1_000_000;
+    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
 }

 /// Per-tenant configuration options
--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -108,15 +108,21 @@ pub enum ThreadKind {
    StorageSync,
 }

+struct MutableThreadState {
+    /// Tenant and timeline that this thread is associated with.
+    tenant_id: Option<ZTenantId>,
+    timeline_id: Option<ZTimelineId>,
+
+    /// Handle for waiting for the thread to exit. It can be None, if the
+    /// the thread has already exited.
+    join_handle: Option<JoinHandle<()>>,
+}
+
 struct PageServerThread {
    _thread_id: u64,

    kind: ThreadKind,

-    /// Tenant and timeline that this thread is associated with.
-    tenant_id: Option<ZTenantId>,
-    timeline_id: Option<ZTimelineId>,
-
    name: String,

    // To request thread shutdown, set the flag, and send a dummy message to the
@@ -124,9 +130,7 @@ struct PageServerThread {
    shutdown_requested: AtomicBool,
    shutdown_tx: watch::Sender<()>,

-    /// Handle for waiting for the thread to exit. It can be None, if the
-    /// the thread has already exited.
-    join_handle: Mutex<Option<JoinHandle<()>>>,
+    mutable: Mutex<MutableThreadState>,
 }

 /// Launch a new thread
@@ -145,29 +149,27 @@ where
 {
    let (shutdown_tx, shutdown_rx) = watch::channel(());
    let thread_id = NEXT_THREAD_ID.fetch_add(1, Ordering::Relaxed);
-    let thread = PageServerThread {
+    let thread = Arc::new(PageServerThread {
        _thread_id: thread_id,
        kind,
-        tenant_id,
-        timeline_id,
        name: name.to_string(),
-
        shutdown_requested: AtomicBool::new(false),
        shutdown_tx,
-
-        join_handle: Mutex::new(None),
-    };
-
-    let thread_rc = Arc::new(thread);
-
-    let mut jh_guard = thread_rc.join_handle.lock().unwrap();
+        mutable: Mutex::new(MutableThreadState {
+            tenant_id,
+            timeline_id,
+            join_handle: None,
+        }),
+    });

    THREADS
        .lock()
        .unwrap()
-        .insert(thread_id, Arc::clone(&thread_rc));
+        .insert(thread_id, Arc::clone(&thread));

-    let thread_rc2 = Arc::clone(&thread_rc);
+    let mut thread_mut = thread.mutable.lock().unwrap();
+
+    let thread_cloned = Arc::clone(&thread);
    let thread_name = name.to_string();
    let join_handle = match thread::Builder::new()
        .name(name.to_string())
@@ -175,7 +177,7 @@ where
            thread_wrapper(
                thread_name,
                thread_id,
-                thread_rc2,
+                thread_cloned,
                shutdown_rx,
                shutdown_process_on_error,
                f,
@@ -189,8 +191,8 @@ where
            return Err(err);
        }
    };
-    *jh_guard = Some(join_handle);
-    drop(jh_guard);
+    thread_mut.join_handle = Some(join_handle);
+    drop(thread_mut);

    // The thread is now running. Nothing more to do here
    Ok(thread_id)
@@ -229,19 +231,20 @@ fn thread_wrapper<F>(
        .remove(&thread_id)
        .expect("no thread in registry");

+    let thread_mut = thread.mutable.lock().unwrap();
    match result {
        Ok(Ok(())) => debug!("Thread '{}' exited normally", thread_name),
        Ok(Err(err)) => {
            if shutdown_process_on_error {
                error!(
                    "Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
                shutdown_pageserver(1);
            } else {
                error!(
                    "Thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
            }
        }
@@ -249,19 +252,29 @@ fn thread_wrapper<F>(
            if shutdown_process_on_error {
                error!(
                    "Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
                shutdown_pageserver(1);
            } else {
                error!(
                    "Thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
            }
        }
    }
 }

+// expected to be called from the thread of the given id.
+pub fn associate_with(tenant_id: Option<ZTenantId>, timeline_id: Option<ZTimelineId>) {
+    CURRENT_THREAD.with(|ct| {
+        let borrowed = ct.borrow();
+        let mut thread_mut = borrowed.as_ref().unwrap().mutable.lock().unwrap();
+        thread_mut.tenant_id = tenant_id;
+        thread_mut.timeline_id = timeline_id;
+    });
+}
+
 /// Is there a thread running that matches the criteria

 /// Signal and wait for threads to shut down.
@@ -285,9 +298,10 @@ pub fn shutdown_threads(

    let threads = THREADS.lock().unwrap();
    for thread in threads.values() {
+        let thread_mut = thread.mutable.lock().unwrap();
        if (kind.is_none() || Some(thread.kind) == kind)
-            && (tenant_id.is_none() || thread.tenant_id == tenant_id)
-            && (timeline_id.is_none() || thread.timeline_id == timeline_id)
+            && (tenant_id.is_none() || thread_mut.tenant_id == tenant_id)
+            && (timeline_id.is_none() || thread_mut.timeline_id == timeline_id)
        {
            thread.shutdown_requested.store(true, Ordering::Relaxed);
            // FIXME: handle error?
@@ -298,8 +312,10 @@ pub fn shutdown_threads(
    drop(threads);

    for thread in victim_threads {
+        let mut thread_mut = thread.mutable.lock().unwrap();
        info!("waiting for {} to shut down", thread.name);
-        if let Some(join_handle) = thread.join_handle.lock().unwrap().take() {
+        if let Some(join_handle) = thread_mut.join_handle.take() {
+            drop(thread_mut);
            let _ = join_handle.join();
        } else {
            // The thread had not even fully started yet. Or it was shut down
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -50,7 +50,10 @@ use crate::thread_mgr::ThreadKind;
 use crate::{thread_mgr, DatadirTimelineImpl};
 use anyhow::{ensure, Context};
 use chrono::{NaiveDateTime, Utc};
-use etcd_broker::{Client, SkTimelineInfo, SkTimelineSubscription, SkTimelineSubscriptionKind};
+use etcd_broker::{
+    subscription_key::SubscriptionKey, subscription_value::SkTimelineInfo, BrokerSubscription,
+    Client,
+};
 use itertools::Itertools;
 use once_cell::sync::Lazy;
 use std::cell::Cell;
@@ -68,7 +71,7 @@ use tokio::{
 use tracing::*;
 use url::Url;
 use utils::lsn::Lsn;
-use utils::pq_proto::ZenithFeedback;
+use utils::pq_proto::ReplicationFeedback;
 use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId};

 use self::connection_handler::{WalConnectionEvent, WalReceiverConnection};
@@ -403,7 +406,7 @@ async fn timeline_wal_broker_loop_step(
    // Endlessly try to subscribe for broker updates for a given timeline.
    // If there are no safekeepers to maintain the lease, the timeline subscription will be inavailable in the broker and the operation will fail constantly.
    // This is ok, pageservers should anyway try subscribing (with some backoff) since it's the only way they can get the timeline WAL anyway.
-    let mut broker_subscription: SkTimelineSubscription;
+    let mut broker_subscription: BrokerSubscription<SkTimelineInfo>;
    let mut attempt = 0;
    loop {
        select! {
@@ -420,9 +423,9 @@ async fn timeline_wal_broker_loop_step(
                info!("Broker subscription loop cancelled, shutting down");
                return Ok(ControlFlow::Break(()));
            },
-            new_subscription = etcd_broker::subscribe_to_safekeeper_timeline_updates(
+            new_subscription = etcd_broker::subscribe_for_json_values(
                etcd_client,
-                SkTimelineSubscriptionKind::timeline(broker_prefix.to_owned(), id),
+                SubscriptionKey::sk_timeline_info(broker_prefix.to_owned(), id),
            )
            .instrument(info_span!("etcd_subscription")) => match new_subscription {
                Ok(new_subscription) => {
@@ -468,14 +471,21 @@ async fn timeline_wal_broker_loop_step(
            // finally, if no other tasks are completed, get another broker update and possibly reconnect
            updates = broker_subscription.fetch_data() => match updates {
                Some(mut all_timeline_updates) => {
-                    if let Some(subscribed_timeline_updates) = all_timeline_updates.remove(&id) {
-                        match wal_connection_manager.select_connection_candidate(subscribed_timeline_updates) {
-                            Some(candidate) => {
-                                info!("Switching to different safekeeper {} for timeline {id}, reason: {:?}", candidate.safekeeper_id, candidate.reason);
-                                wal_connection_manager.change_connection(candidate.safekeeper_id, candidate.wal_producer_connstr).await;
-                            },
-                            None => {}
+                    match all_timeline_updates.remove(&id) {
+                        Some(subscribed_timeline_updates) => {
+                            match wal_connection_manager.select_connection_candidate(subscribed_timeline_updates) {
+                                Some(candidate) => {
+                                    info!("Switching to different safekeeper {} for timeline {id}, reason: {:?}", candidate.safekeeper_id, candidate.reason);
+                                    wal_connection_manager.change_connection(candidate.safekeeper_id, candidate.wal_producer_connstr).await;
+                                },
+                                None => debug!("No connection candidate was selected for timeline"),
+                            }
                        }
+                        // XXX: If we subscribe for a certain timeline, we expect only its data to come.
+                        // But somebody could propagate a new etcd key, that has the same prefix as the subscribed one, then we'll get odd data.
+                        // This is an error, we don't want to have overlapping prefixes for timelines, but we can complain and thow those away instead of panicking,
+                        // since the next poll might bring the correct data.
+                        None => error!("Timeline has an active broker subscription, but got no updates. Other data length: {}", all_timeline_updates.len()),
                    }
                },
                None => {
@@ -511,7 +521,7 @@ struct WalConnectionData {
    safekeeper_id: NodeId,
    connection: WalReceiverConnection,
    connection_init_time: NaiveDateTime,
-    last_wal_receiver_data: Option<(ZenithFeedback, NaiveDateTime)>,
+    last_wal_receiver_data: Option<(ReplicationFeedback, NaiveDateTime)>,
 }

 #[derive(Debug, PartialEq, Eq)]
@@ -622,18 +632,28 @@ impl WalConnectionManager {
    /// Checks current state against every fetched safekeeper state of a given timeline.
    /// Returns a new candidate, if the current state is somewhat lagging, or `None` otherwise.
    /// The current rules for approving new candidates:
-    /// * pick the safekeeper with biggest `commit_lsn` that's after than pageserver's latest Lsn for the timeline
-    /// * if the leader is a different SK and either
-    ///     * no WAL updates happened after certain time (either none since the connection time or none since the last event after the connection) — reconnect
-    ///     * same time amount had passed since the connection, WAL updates happened recently, but the new leader SK has timeline Lsn way ahead of the old one — reconnect
+    /// * pick from the input data from etcd for currently connected safekeeper (if any)
+    /// * out of the rest input entries, pick one with biggest `commit_lsn` that's after than pageserver's latest Lsn for the timeline
+    /// * if there's no such entry, no new candidate found, abort
+    /// * otherwise, check if etcd updates contain currently connected safekeeper
+    ///     * if not, that means no WAL updates happened after certain time (either none since the connection time or none since the last event after the connection)
+    ///       Reconnect if the time exceeds the threshold.
+    ///     * if there's one, compare its Lsn with the other candidate's, reconnect if candidate's over threshold
    ///
    /// This way we ensure to keep up with the most up-to-date safekeeper and don't try to jump from one safekeeper to another too frequently.
    /// Both thresholds are configured per tenant.
    fn select_connection_candidate(
        &self,
-        safekeeper_timelines: HashMap<NodeId, SkTimelineInfo>,
+        mut safekeeper_timelines: HashMap<NodeId, SkTimelineInfo>,
    ) -> Option<NewWalConnectionCandidate> {
-        let (&new_sk_id, new_sk_timeline, new_wal_producer_connstr) = safekeeper_timelines
+        let current_sk_data_updated =
+            self.wal_connection_data
+                .as_ref()
+                .and_then(|connection_data| {
+                    safekeeper_timelines.remove(&connection_data.safekeeper_id)
+                });
+
+        let candidate_sk_data = safekeeper_timelines
            .iter()
            .filter(|(_, info)| {
                info.commit_lsn > Some(self.timeline.tline.get_last_record_lsn())
@@ -642,7 +662,6 @@ impl WalConnectionManager {
                match wal_stream_connection_string(
                    self.id,
                    info.safekeeper_connstr.as_deref()?,
-                    info.pageserver_connstr.as_deref()?,
                ) {
                    Ok(connstr) => Some((sk_id, info, connstr)),
                    Err(e) => {
@@ -651,68 +670,78 @@ impl WalConnectionManager {
                    }
                }
            })
-            .max_by_key(|(_, info, _)| info.commit_lsn)?;
+            .max_by_key(|(_, info, _)| info.commit_lsn);

-        match self.wal_connection_data.as_ref() {
-            None => Some(NewWalConnectionCandidate {
-                safekeeper_id: new_sk_id,
-                wal_producer_connstr: new_wal_producer_connstr,
-                reason: ReconnectReason::NoExistingConnection,
-            }),
-            Some(current_connection) => {
-                if current_connection.safekeeper_id == new_sk_id {
-                    None
-                } else {
-                    self.reason_to_reconnect(current_connection, new_sk_timeline)
-                        .map(|reason| NewWalConnectionCandidate {
-                            safekeeper_id: new_sk_id,
-                            wal_producer_connstr: new_wal_producer_connstr,
-                            reason,
-                        })
+        match (current_sk_data_updated, candidate_sk_data) {
+            // No better candidate than one we're already connected to:
+            // whatever data update comes for the connected one, we don't have a better candidate
+            (_, None) => None,
+
+            // No updates from the old SK in this batch, but some candidate is available:
+            // check how long time ago did we receive updates from the current SK, switch connections in case it's over the threshold
+            (None, Some((&new_sk_id, _, new_wal_producer_connstr))) => {
+                match self.wal_connection_data.as_ref() {
+                    Some(current_connection) => {
+                        let last_sk_interaction_time =
+                            match current_connection.last_wal_receiver_data.as_ref() {
+                                Some((_, data_submission_time)) => *data_submission_time,
+                                None => current_connection.connection_init_time,
+                            };
+
+                        let now = Utc::now().naive_utc();
+                        match (now - last_sk_interaction_time).to_std() {
+                            Ok(last_interaction) => {
+                                if last_interaction > self.lagging_wal_timeout {
+                                    return Some(NewWalConnectionCandidate {
+                                        safekeeper_id: new_sk_id,
+                                        wal_producer_connstr: new_wal_producer_connstr,
+                                        reason: ReconnectReason::NoWalTimeout {
+                                            last_wal_interaction: last_sk_interaction_time,
+                                            check_time: now,
+                                            threshold: self.lagging_wal_timeout,
+                                        },
+                                    });
+                                }
+                            }
+                            Err(_e) => {
+                                warn!("Last interaction with safekeeper {} happened in the future, ignoring the candidate. Interaction time: {last_sk_interaction_time}, now: {now}", current_connection.safekeeper_id);
+                            }
+                        }
+                        None
+                    }
+                    None => Some(NewWalConnectionCandidate {
+                        safekeeper_id: new_sk_id,
+                        wal_producer_connstr: new_wal_producer_connstr,
+                        reason: ReconnectReason::NoExistingConnection,
+                    }),
                }
            }
-        }
-    }
-
-    fn reason_to_reconnect(
-        &self,
-        current_connection: &WalConnectionData,
-        new_sk_timeline: &SkTimelineInfo,
-    ) -> Option<ReconnectReason> {
-        let last_sk_interaction_time = match current_connection.last_wal_receiver_data.as_ref() {
-            Some((last_wal_receiver_data, data_submission_time)) => {
-                let new_lsn = new_sk_timeline.commit_lsn?;
-                match new_lsn.0.checked_sub(last_wal_receiver_data.ps_writelsn)
+            // Both current SK got updated via etcd and there's another candidate with suitable Lsn:
+            // check how bigger the new SK Lsn is in the future compared to the current SK, switch connections in case it's over the threshold
+            (
+                Some(current_sk_timeline),
+                Some((&new_sk_id, new_sk_timeline, new_wal_producer_connstr)),
+            ) => {
+                let new_lsn = new_sk_timeline.commit_lsn.unwrap_or(Lsn(0));
+                let current_lsn = current_sk_timeline.commit_lsn.unwrap_or(Lsn(0));
+                match new_lsn.0.checked_sub(current_lsn.0)
                {
-                    Some(sk_lsn_advantage) => {
-                        if sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
-                            return Some(ReconnectReason::LaggingWal { current_lsn: Lsn(last_wal_receiver_data.ps_writelsn), new_lsn, threshold: self.max_lsn_wal_lag });
+                    Some(new_sk_lsn_advantage) => {
+                        if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
+                            return Some(
+                                NewWalConnectionCandidate {
+                                    safekeeper_id: new_sk_id,
+                                    wal_producer_connstr: new_wal_producer_connstr,
+                                    reason: ReconnectReason::LaggingWal { current_lsn, new_lsn, threshold: self.max_lsn_wal_lag },
+                                });
                        }
                    }
                    None => debug!("Best SK candidate has its commit Lsn behind the current timeline's latest consistent Lsn"),
                }
-                *data_submission_time
-            }
-            None => current_connection.connection_init_time,
-        };

-        let now = Utc::now().naive_utc();
-        match (now - last_sk_interaction_time).to_std() {
-            Ok(last_interaction) => {
-                if last_interaction > self.lagging_wal_timeout {
-                    return Some(ReconnectReason::NoWalTimeout {
-                        last_wal_interaction: last_sk_interaction_time,
-                        check_time: now,
-                        threshold: self.lagging_wal_timeout,
-                    });
-                }
-            }
-            Err(_e) => {
-                warn!("Last interaction with safekeeper {} happened in the future, ignoring the candidate. Interaction time: {last_sk_interaction_time}, now: {now}",
-                    current_connection.safekeeper_id);
+                None
            }
        }
-        None
    }
 }

@@ -722,7 +751,6 @@ fn wal_stream_connection_string(
        timeline_id,
    }: ZTenantTimelineId,
    listen_pg_addr_str: &str,
-    pageserver_connstr: &str,
 ) -> anyhow::Result<String> {
    let sk_connstr = format!("postgresql://no_user@{listen_pg_addr_str}/no_db");
    let me_conf = sk_connstr
@@ -732,7 +760,7 @@ fn wal_stream_connection_string(
        })?;
    let (host, port) = utils::connstring::connection_host_port(&me_conf);
    Ok(format!(
-        "host={host} port={port} options='-c ztimelineid={timeline_id} ztenantid={tenant_id} pageserver_connstr={pageserver_connstr}'",
+        "host={host} port={port} options='-c ztimelineid={timeline_id} ztenantid={tenant_id}'"
    ))
 }

@@ -765,20 +793,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: None,
-                        pageserver_connstr: Some("no safekeeper_connstr".to_string()),
-                    },
-                ),
-                (
-                    NodeId(1),
-                    SkTimelineInfo {
-                        last_log_term: None,
-                        flush_lsn: None,
-                        commit_lsn: Some(Lsn(1)),
-                        backup_lsn: None,
-                        remote_consistent_lsn: None,
-                        peer_horizon_lsn: None,
-                        safekeeper_connstr: Some("no pageserver_connstr".to_string()),
-                        pageserver_connstr: None,
                    },
                ),
                (
@@ -791,7 +805,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: Some("no commit_lsn".to_string()),
-                        pageserver_connstr: Some("no commit_lsn (p)".to_string()),
                    },
                ),
                (
@@ -804,7 +817,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: Some("no commit_lsn".to_string()),
-                        pageserver_connstr: Some("no commit_lsn (p)".to_string()),
                    },
                ),
            ]));
@@ -834,7 +846,7 @@ mod tests {
        .await;
        let now = Utc::now().naive_utc();
        dummy_connection_data.last_wal_receiver_data = Some((
-            ZenithFeedback {
+            ReplicationFeedback {
                current_timeline_size: 1,
                ps_writelsn: 1,
                ps_applylsn: current_lsn,
@@ -860,7 +872,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
-                        pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
                    },
                ),
                (
@@ -873,7 +884,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: Some("not advanced Lsn".to_string()),
-                        pageserver_connstr: Some("not advanced Lsn (p)".to_string()),
                    },
                ),
                (
@@ -888,7 +898,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: Some("not enough advanced Lsn".to_string()),
-                        pageserver_connstr: Some("not enough advanced Lsn (p)".to_string()),
                    },
                ),
            ]));
@@ -920,7 +929,6 @@ mod tests {
                    remote_consistent_lsn: None,
                    peer_horizon_lsn: None,
                    safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
-                    pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
                },
            )]))
            .expect("Expected one candidate selected out of the only data option, but got none");
@@ -933,9 +941,6 @@ mod tests {
        assert!(only_candidate
            .wal_producer_connstr
            .contains(DUMMY_SAFEKEEPER_CONNSTR));
-        assert!(only_candidate
-            .wal_producer_connstr
-            .contains(DUMMY_PAGESERVER_CONNSTR));

        let selected_lsn = 100_000;
        let biggest_wal_candidate = data_manager_with_no_connection
@@ -950,7 +955,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: Some("smaller commit_lsn".to_string()),
-                        pageserver_connstr: Some("smaller commit_lsn (p)".to_string()),
                    },
                ),
                (
@@ -963,7 +967,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
-                        pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
                    },
                ),
                (
@@ -976,9 +979,6 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        safekeeper_connstr: None,
-                        pageserver_connstr: Some(
-                            "no safekeeper_connstr despite bigger commit_lsn".to_string(),
-                        ),
                    },
                ),
            ]))
@@ -995,9 +995,6 @@ mod tests {
        assert!(biggest_wal_candidate
            .wal_producer_connstr
            .contains(DUMMY_SAFEKEEPER_CONNSTR));
-        assert!(biggest_wal_candidate
-            .wal_producer_connstr
-            .contains(DUMMY_PAGESERVER_CONNSTR));

        Ok(())
    }
@@ -1014,13 +1011,13 @@ mod tests {

        let mut data_manager_with_connection = dummy_wal_connection_manager(&harness);
        let connected_sk_id = NodeId(0);
-        let mut dummy_connection_data = dummy_connection_data(id, NodeId(0)).await;
+        let mut dummy_connection_data = dummy_connection_data(id, connected_sk_id).await;
        let lagging_wal_timeout =
            chrono::Duration::from_std(data_manager_with_connection.lagging_wal_timeout)?;
        let time_over_threshold =
            Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout;
        dummy_connection_data.last_wal_receiver_data = Some((
-            ZenithFeedback {
+            ReplicationFeedback {
                current_timeline_size: 1,
                ps_writelsn: current_lsn.0,
                ps_applylsn: 1,
@@ -1044,7 +1041,6 @@ mod tests {
                    remote_consistent_lsn: None,
                    peer_horizon_lsn: None,
                    safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
-                    pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
                },
            ),
            (
@@ -1057,7 +1053,6 @@ mod tests {
                    remote_consistent_lsn: None,
                    peer_horizon_lsn: None,
                    safekeeper_connstr: Some("advanced by Lsn safekeeper".to_string()),
-                    pageserver_connstr: Some("advanced by Lsn safekeeper (p)".to_string()),
                },
            ),
        ]);
@@ -1081,16 +1076,13 @@ mod tests {
        assert!(over_threshcurrent_candidate
            .wal_producer_connstr
            .contains("advanced by Lsn safekeeper"));
-        assert!(over_threshcurrent_candidate
-            .wal_producer_connstr
-            .contains("advanced by Lsn safekeeper (p)"));

        Ok(())
    }

    #[tokio::test]
-    async fn timeout_wal_over_threshcurrent_candidate() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("timeout_wal_over_threshcurrent_candidate")?;
+    async fn timeout_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
+        let harness = RepoHarness::create("timeout_wal_over_threshhold_current_candidate")?;
        let current_lsn = Lsn(100_000).align();

        let id = ZTenantTimelineId {
@@ -1108,36 +1100,19 @@ mod tests {
        dummy_connection_data.connection_init_time = time_over_threshold;
        data_manager_with_connection.wal_connection_data = Some(dummy_connection_data);

-        let new_lsn = Lsn(current_lsn.0 + data_manager_with_connection.max_lsn_wal_lag.get() + 1);
        let over_threshcurrent_candidate = data_manager_with_connection
-            .select_connection_candidate(HashMap::from([
-                (
-                    NodeId(0),
-                    SkTimelineInfo {
-                        last_log_term: None,
-                        flush_lsn: None,
-                        commit_lsn: Some(new_lsn),
-                        backup_lsn: None,
-                        remote_consistent_lsn: None,
-                        peer_horizon_lsn: None,
-                        safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
-                        pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
-                    },
-                ),
-                (
-                    NodeId(1),
-                    SkTimelineInfo {
-                        last_log_term: None,
-                        flush_lsn: None,
-                        commit_lsn: Some(current_lsn),
-                        backup_lsn: None,
-                        remote_consistent_lsn: None,
-                        peer_horizon_lsn: None,
-                        safekeeper_connstr: Some("not advanced by Lsn safekeeper".to_string()),
-                        pageserver_connstr: Some("not advanced by Lsn safekeeper".to_string()),
-                    },
-                ),
-            ]))
+            .select_connection_candidate(HashMap::from([(
+                NodeId(0),
+                SkTimelineInfo {
+                    last_log_term: None,
+                    flush_lsn: None,
+                    commit_lsn: Some(current_lsn),
+                    backup_lsn: None,
+                    remote_consistent_lsn: None,
+                    peer_horizon_lsn: None,
+                    safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
+                },
+            )]))
            .expect(
                "Expected one candidate selected out of multiple valid data options, but got none",
            );
@@ -1157,9 +1132,6 @@ mod tests {
        assert!(over_threshcurrent_candidate
            .wal_producer_connstr
            .contains(DUMMY_SAFEKEEPER_CONNSTR));
-        assert!(over_threshcurrent_candidate
-            .wal_producer_connstr
-            .contains(DUMMY_PAGESERVER_CONNSTR));

        Ok(())
    }
@@ -1186,7 +1158,6 @@ mod tests {
    }

    const DUMMY_SAFEKEEPER_CONNSTR: &str = "safekeeper_connstr";
-    const DUMMY_PAGESERVER_CONNSTR: &str = "pageserver_connstr";

    // the function itself does not need async, but it spawns a tokio::task underneath hence neeed
    // a runtime to not to panic
@@ -1194,9 +1165,8 @@ mod tests {
        id: ZTenantTimelineId,
        safekeeper_id: NodeId,
    ) -> WalConnectionData {
-        let dummy_connstr =
-            wal_stream_connection_string(id, DUMMY_SAFEKEEPER_CONNSTR, DUMMY_PAGESERVER_CONNSTR)
-                .expect("Failed to construct dummy wal producer connstr");
+        let dummy_connstr = wal_stream_connection_string(id, DUMMY_SAFEKEEPER_CONNSTR)
+            .expect("Failed to construct dummy wal producer connstr");
        WalConnectionData {
            safekeeper_id,
            connection: WalReceiverConnection::open(
--- a/pageserver/src/walreceiver/connection_handler.rs
+++ b/pageserver/src/walreceiver/connection_handler.rs
@@ -19,7 +19,7 @@ use tokio_stream::StreamExt;
 use tracing::{debug, error, info, info_span, trace, warn, Instrument};
 use utils::{
    lsn::Lsn,
-    pq_proto::ZenithFeedback,
+    pq_proto::ReplicationFeedback,
    zid::{NodeId, ZTenantTimelineId},
 };

@@ -33,7 +33,7 @@ use crate::{
 #[derive(Debug, Clone)]
 pub enum WalConnectionEvent {
    Started,
-    NewWal(ZenithFeedback),
+    NewWal(ReplicationFeedback),
    End(Result<(), String>),
 }

@@ -328,7 +328,7 @@ async fn handle_walreceiver_connection(

            // Send zenith feedback message.
            // Regular standby_status_update fields are put into this message.
-            let zenith_status_update = ZenithFeedback {
+            let zenith_status_update = ReplicationFeedback {
                current_timeline_size: timeline.get_current_logical_size() as u64,
                ps_writelsn: write_lsn,
                ps_flushlsn: flush_lsn,
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -27,9 +27,10 @@ pub struct ClientCredentials {
    // Other Auth backends don't need it.
    pub sni_data: Option<String>,

-    // cluster_option is passed as argument from options from url.
-    // To be used to determine cluster name in case sni_data is missing.
-    pub project_option: Option<String>,
+    // project_name is passed as argument from options from url.
+    // In case sni_data is missing: project_name is used to determine cluster name.
+    // In case sni_data is available: project_name and sni_data should match (otherwise throws an error).
+    pub project_name: Option<String>,
 }

 impl ClientCredentials {
@@ -41,29 +42,42 @@ impl ClientCredentials {

 #[derive(Debug, Error)]
 pub enum ProjectNameError {
-    #[error("SNI info is missing. EITHER please upgrade the postgres client library OR pass the project name as a parameter: '..&options=project:<project name>..'.")]
+    #[error("SNI is missing. EITHER please upgrade the postgres client library OR pass the project name as a parameter: '...&options=project%3D<project-name>...'.")]
    Missing,

    #[error("SNI is malformed.")]
    Bad,
+
+    #[error("Inconsistent project name inferred from SNI and project option. String from SNI: '{0}', String from project option: '{1}'")]
+    Inconsistent(String, String),
 }

 impl UserFacingError for ProjectNameError {}

 impl ClientCredentials {
-    /// Determine project name from SNI.
+    /// Determine project name from SNI or from project_name parameter from options argument.
    pub fn project_name(&self) -> Result<&str, ProjectNameError> {
-        let ret = match &self.sni_data {
-            //if sni_data exists, use it to determine project name
-            Some(sni_data) => {
-                sni_data
-                    .split_once('.')
-                    .ok_or(ProjectNameError::Bad)?
-                    .0
+        // Checking that if both sni_data and project_name are set, then they should match
+        // otherwise, throws a ProjectNameError::Inconsistent error.
+        if let Some(sni_data) = &self.sni_data {
+            let project_name_from_sni_data =
+                sni_data.split_once('.').ok_or(ProjectNameError::Bad)?.0;
+            if let Some(project_name_from_options) = &self.project_name {
+                if !project_name_from_options.eq(project_name_from_sni_data) {
+                    return Err(ProjectNameError::Inconsistent(
+                        project_name_from_sni_data.to_string(),
+                        project_name_from_options.to_string(),
+                    ));
+                }
            }
-            //otherwise use project_option if it was manually set thought ..&options=project:<name> parameter
+        }
+        // determine the project name from self.sni_data if it exists, otherwise from self.project_name.
+        let ret = match &self.sni_data {
+            // if sni_data exists, use it to determine project name
+            Some(sni_data) => sni_data.split_once('.').ok_or(ProjectNameError::Bad)?.0,
+            // otherwise use project_option if it was manually set thought options parameter.
            None => self
-                .project_option
+                .project_name
                .as_ref()
                .ok_or(ProjectNameError::Missing)?
                .as_str(),
@@ -84,17 +98,13 @@ impl TryFrom<HashMap<String, String>> for ClientCredentials {

        let user = get_param("user")?;
        let dbname = get_param("database")?;
-        let project = get_param("project");
-        let project_option = match project {
-            Ok(project) => Some(project),
-            Err(_) => None,
-        };
+        let project_name = get_param("project").ok();

        Ok(Self {
            user,
            dbname,
            sni_data: None,
-            project_option,
+            project_name,
        })
    }
 }
--- a/proxy/src/auth_backend/console.rs
+++ b/proxy/src/auth_backend/console.rs
@@ -1,251 +0,0 @@
-//! Declaration of Cloud API V2.
-
-use crate::{
-    auth::{self, AuthFlow},
-    compute, scram,
-};
-use serde::{Deserialize, Serialize};
-use thiserror::Error;
-
-use crate::auth::ClientCredentials;
-use crate::stream::PqStream;
-
-use tokio::io::{AsyncRead, AsyncWrite};
-use utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage};
-
-#[derive(Debug, Error)]
-pub enum ConsoleAuthError {
-    // We shouldn't include the actual secret here.
-    #[error("Bad authentication secret")]
-    BadSecret,
-
-    #[error("Bad client credentials: {0:?}")]
-    BadCredentials(crate::auth::ClientCredentials),
-
-    #[error("SNI info is missing. EITHER please upgrade the postgres client library OR pass ..&options=cluster:<project name>.. parameter")]
-    SniMissingAndProjectNameMissing,
-
-    #[error("Unexpected SNI content")]
-    SniWrong,
-
-    #[error(transparent)]
-    BadUrl(#[from] url::ParseError),
-
-    #[error(transparent)]
-    Io(#[from] std::io::Error),
-
-    /// HTTP status (other than 200) returned by the console.
-    #[error("Console responded with an HTTP status: {0}")]
-    HttpStatus(reqwest::StatusCode),
-
-    #[error(transparent)]
-    Transport(#[from] reqwest::Error),
-
-    #[error("Console responded with a malformed JSON: '{0}'")]
-    MalformedResponse(#[from] serde_json::Error),
-
-    #[error("Console responded with a malformed compute address: '{0}'")]
-    MalformedComputeAddress(String),
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct GetRoleSecretResponse {
-    role_secret: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct GetWakeComputeResponse {
-    address: String,
-}
-
-/// Auth secret which is managed by the cloud.
-pub enum AuthInfo {
-    /// Md5 hash of user's password.
-    Md5([u8; 16]),
-    /// [SCRAM](crate::scram) authentication info.
-    Scram(scram::ServerSecret),
-}
-
-/// Compute node connection params provided by the cloud.
-/// Note how it implements serde traits, since we receive it over the wire.
-#[derive(Serialize, Deserialize, Default)]
-pub struct DatabaseInfo {
-    pub host: String,
-    pub port: u16,
-    pub dbname: String,
-    pub user: String,
-
-    /// [Cloud API V1](super::legacy) returns cleartext password,
-    /// but [Cloud API V2](super::api) implements [SCRAM](crate::scram)
-    /// authentication, so we can leverage this method and cope without password.
-    pub password: Option<String>,
-}
-
-// Manually implement debug to omit personal and sensitive info.
-impl std::fmt::Debug for DatabaseInfo {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
-        fmt.debug_struct("DatabaseInfo")
-            .field("host", &self.host)
-            .field("port", &self.port)
-            .finish()
-    }
-}
-
-impl From<DatabaseInfo> for tokio_postgres::Config {
-    fn from(db_info: DatabaseInfo) -> Self {
-        let mut config = tokio_postgres::Config::new();
-
-        config
-            .host(&db_info.host)
-            .port(db_info.port)
-            .dbname(&db_info.dbname)
-            .user(&db_info.user);
-
-        if let Some(password) = db_info.password {
-            config.password(password);
-        }
-
-        config
-    }
-}
-
-async fn get_auth_info(
-    auth_endpoint: &str,
-    user: &str,
-    cluster: &str,
-) -> Result<AuthInfo, ConsoleAuthError> {
-    let mut url = reqwest::Url::parse(&format!("{auth_endpoint}/proxy_get_role_secret"))?;
-
-    url.query_pairs_mut()
-        .append_pair("project", cluster)
-        .append_pair("role", user);
-
-    // TODO: use a proper logger
-    println!("cplane request: {}", url);
-
-    let resp = reqwest::get(url).await?;
-    if !resp.status().is_success() {
-        return Err(ConsoleAuthError::HttpStatus(resp.status()));
-    }
-
-    let response: GetRoleSecretResponse = serde_json::from_str(resp.text().await?.as_str())?;
-
-    scram::ServerSecret::parse(response.role_secret.as_str())
-        .map(AuthInfo::Scram)
-        .ok_or(ConsoleAuthError::BadSecret)
-}
-
-/// Wake up the compute node and return the corresponding connection info.
-async fn wake_compute(
-    auth_endpoint: &str,
-    cluster: &str,
-) -> Result<(String, u16), ConsoleAuthError> {
-    let mut url = reqwest::Url::parse(&format!("{auth_endpoint}/proxy_wake_compute"))?;
-    url.query_pairs_mut().append_pair("project", cluster);
-
-    // TODO: use a proper logger
-    println!("cplane request: {}", url);
-
-    let resp = reqwest::get(url).await?;
-    if !resp.status().is_success() {
-        return Err(ConsoleAuthError::HttpStatus(resp.status()));
-    }
-
-    let response: GetWakeComputeResponse = serde_json::from_str(resp.text().await?.as_str())?;
-    let (host, port) = response
-        .address
-        .split_once(':')
-        .ok_or_else(|| ConsoleAuthError::MalformedComputeAddress(response.address.clone()))?;
-    let port: u16 = port
-        .parse()
-        .map_err(|_| ConsoleAuthError::MalformedComputeAddress(response.address.clone()))?;
-
-    Ok((host.to_string(), port))
-}
-
-pub async fn handle_user(
-    auth_endpoint: &str,
-    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-    creds: &ClientCredentials,
-) -> Result<compute::NodeInfo, crate::auth::AuthError> {
-    // Determine cluster name from SNI (creds.sni_data) or from creds.cluster_option.
-    let cluster = match &creds.sni_data {
-        //if sni_data exists, use it
-        Some(sni_data) => {
-            sni_data
-                .split_once('.')
-                .ok_or(ConsoleAuthError::SniWrong)?
-                .0
-        }
-        //otherwise use cluster_option if it was manually set thought ..&options=cluster:<name> parameter
-        None => creds
-            .cluster_option
-            .as_ref()
-            .ok_or(ConsoleAuthError::SniMissingAndProjectNameMissing)?
-            .as_str(),
-    };
-
-    let user = creds.user.as_str();
-
-    // Step 1: get the auth secret
-    let auth_info = get_auth_info(auth_endpoint, user, cluster).await?;
-
-    let flow = AuthFlow::new(client);
-    let scram_keys = match auth_info {
-        AuthInfo::Md5(_) => {
-            // TODO: decide if we should support MD5 in api v2
-            return Err(crate::auth::AuthErrorImpl::auth_failed("MD5 is not supported").into());
-        }
-        AuthInfo::Scram(secret) => {
-            let scram = auth::Scram(&secret);
-            Some(compute::ScramKeys {
-                client_key: flow.begin(scram).await?.authenticate().await?.as_bytes(),
-                server_key: secret.server_key.as_bytes(),
-            })
-        }
-    };
-
-    client
-        .write_message_noflush(&Be::AuthenticationOk)?
-        .write_message_noflush(&BeParameterStatusMessage::encoding())?;
-
-    // Step 2: wake compute
-    let (host, port) = wake_compute(auth_endpoint, cluster).await?;
-
-    Ok(compute::NodeInfo {
-        db_info: DatabaseInfo {
-            host,
-            port,
-            dbname: creds.dbname.clone(),
-            user: creds.user.clone(),
-            password: None,
-        },
-        scram_keys,
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    #[test]
-    fn parse_db_info() -> anyhow::Result<()> {
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "password": "password",
-        }))?;
-
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-        }))?;
-
-        Ok(())
-    }
-}
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -10,6 +10,7 @@ use remote_storage::RemoteStorageConfig;
 use std::fs::{self, File};
 use std::io::{ErrorKind, Write};
 use std::path::{Path, PathBuf};
+use std::sync::Arc;
 use std::thread;
 use tokio::sync::mpsc;
 use toml_edit::Document;
@@ -27,6 +28,7 @@ use safekeeper::timeline::GlobalTimelines;
 use safekeeper::wal_backup;
 use safekeeper::wal_service;
 use safekeeper::SafeKeeperConf;
+use utils::auth::JwtAuth;
 use utils::{
    http::endpoint, logging, project_git_version, shutdown::exit_now, signals, tcp_listener,
    zid::NodeId,
@@ -132,6 +134,12 @@ fn main() -> anyhow::Result<()> {
                .default_missing_value("true")
                .help("Enable/disable WAL backup to s3. When disabled, safekeeper removes WAL ignoring WAL backup horizon."),
        )
+        .arg(
+            Arg::new("auth-validation-public-key-path")
+                .long("auth-validation-public-key-path")
+                .takes_value(true)
+                .help("Path to an RSA .pem public key which is used to check JWT tokens")
+        )
        .get_matches();

    if let Some(addr) = arg_matches.value_of("dump-control-file") {
@@ -204,6 +212,10 @@ fn main() -> anyhow::Result<()> {
        .parse()
        .context("failed to parse bool enable-s3-offload bool")?;

+    conf.auth_validation_public_key_path = arg_matches
+        .value_of("auth-validation-public-key-path")
+        .map(PathBuf::from);
+
    start_safekeeper(conf, given_id, arg_matches.is_present("init"))
 }

@@ -239,6 +251,19 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        e
    })?;

+    let auth = match conf.auth_validation_public_key_path.as_ref() {
+        None => {
+            info!("Auth is disabled");
+            None
+        }
+        Some(path) => {
+            info!("Loading JWT auth key from {}", path.display());
+            Some(Arc::new(
+                JwtAuth::from_key_path(path).context("failed to load the auth key")?,
+            ))
+        }
+    };
+
    // XXX: Don't spawn any threads before daemonizing!
    if conf.daemonize {
        info!("daemonizing...");
@@ -280,8 +305,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        thread::Builder::new()
            .name("http_endpoint_thread".into())
            .spawn(|| {
-                // TODO authentication
-                let router = http::make_router(conf_);
+                let router = http::make_router(conf_, auth);
                endpoint::serve_thread_main(
                    router,
                    http_listener,
@@ -295,6 +319,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
    let safekeeper_thread = thread::Builder::new()
        .name("Safekeeper thread".into())
        .spawn(|| {
+            // TODO: add auth
            if let Err(e) = wal_service::thread_main(conf_cloned, pg_listener) {
                info!("safekeeper thread terminated: {e}");
            }
@@ -309,6 +334,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
            thread::Builder::new()
                .name("broker thread".into())
                .spawn(|| {
+                    // TODO: add auth?
                    broker::thread_main(conf_);
                })?,
        );
@@ -321,6 +347,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        thread::Builder::new()
            .name("WAL removal thread".into())
            .spawn(|| {
+                // TODO: add auth?
                remove_wal::thread_main(conf_);
            })?,
    );
@@ -330,6 +357,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        thread::Builder::new()
            .name("wal backup launcher thread".into())
            .spawn(move || {
+                // TODO: add auth?
                wal_backup::wal_backup_launcher_thread_main(conf_, wal_backup_launcher_rx);
            })?,
    );
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -4,9 +4,7 @@ use anyhow::anyhow;
 use anyhow::Context;
 use anyhow::Error;
 use anyhow::Result;
-use etcd_broker::Client;
-use etcd_broker::PutOptions;
-use etcd_broker::SkTimelineSubscriptionKind;
+use etcd_broker::subscription_value::SkTimelineInfo;
 use std::time::Duration;
 use tokio::spawn;
 use tokio::task::JoinHandle;
@@ -15,6 +13,10 @@ use tracing::*;
 use url::Url;

 use crate::{timeline::GlobalTimelines, SafeKeeperConf};
+use etcd_broker::{
+    subscription_key::{OperationKind, SkOperationKind, SubscriptionKey},
+    Client, PutOptions,
+};
 use utils::zid::{NodeId, ZTenantTimelineId};

 const RETRY_INTERVAL_MSEC: u64 = 1000;
@@ -43,7 +45,7 @@ fn timeline_safekeeper_path(
 ) -> String {
    format!(
        "{}/{sk_id}",
-        SkTimelineSubscriptionKind::timeline(broker_etcd_prefix, zttid).watch_key()
+        SubscriptionKey::sk_timeline_info(broker_etcd_prefix, zttid).watch_key()
    )
 }

@@ -90,7 +92,7 @@ impl ElectionLeader {
    }
 }

-pub async fn get_leader(req: &Election) -> Result<ElectionLeader> {
+pub async fn get_leader(req: &Election, leader: &mut Option<ElectionLeader>) -> Result<()> {
    let mut client = Client::connect(req.broker_endpoints.clone(), None)
        .await
        .context("Could not connect to etcd")?;
@@ -102,22 +104,27 @@ pub async fn get_leader(req: &Election) -> Result<ElectionLeader> {

    let lease_id = lease.map(|l| l.id()).unwrap();

-    let keep_alive = spawn::<_>(lease_keep_alive(client.clone(), lease_id));
+    // kill previous keepalive, if any
+    if let Some(l) = leader.take() {
+        l.give_up().await;
+    }

-    if let Err(e) = client
+    let keep_alive = spawn::<_>(lease_keep_alive(client.clone(), lease_id));
+    // immediately save handle to kill task if we get canceled below
+    *leader = Some(ElectionLeader {
+        client: client.clone(),
+        keep_alive,
+    });
+
+    client
        .campaign(
            req.election_name.clone(),
            req.candidate_name.clone(),
            lease_id,
        )
-        .await
-    {
-        keep_alive.abort();
-        let _ = keep_alive.await;
-        return Err(e.into());
-    }
+        .await?;

-    Ok(ElectionLeader { client, keep_alive })
+    Ok(())
 }

 async fn lease_keep_alive(mut client: Client, lease_id: i64) -> Result<()> {
@@ -143,20 +150,8 @@ async fn lease_keep_alive(mut client: Client, lease_id: i64) -> Result<()> {
    }
 }

-pub fn get_campaign_name(
-    election_name: String,
-    broker_prefix: String,
-    timeline_id: &ZTenantTimelineId,
-) -> String {
-    return format!(
-        "{}/{}",
-        SkTimelineSubscriptionKind::timeline(broker_prefix, *timeline_id).watch_key(),
-        election_name
-    );
-}
-
 pub fn get_candiate_name(system_id: NodeId) -> String {
-    format!("id_{}", system_id)
+    format!("id_{system_id}")
 }

 /// Push once in a while data about all active timelines to the broker.
@@ -208,9 +203,20 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
 async fn pull_loop(conf: SafeKeeperConf) -> Result<()> {
    let mut client = Client::connect(&conf.broker_endpoints, None).await?;

-    let mut subscription = etcd_broker::subscribe_to_safekeeper_timeline_updates(
+    let mut subscription = etcd_broker::subscribe_for_values(
        &mut client,
-        SkTimelineSubscriptionKind::all(conf.broker_etcd_prefix.clone()),
+        SubscriptionKey::all(conf.broker_etcd_prefix.clone()),
+        |full_key, value_str| {
+            if full_key.operation == OperationKind::Safekeeper(SkOperationKind::TimelineInfo) {
+                match serde_json::from_str::<SkTimelineInfo>(value_str) {
+                    Ok(new_info) => return Some(new_info),
+                    Err(e) => {
+                        error!("Failed to parse timeline info from value str '{value_str}': {e}")
+                    }
+                }
+            }
+            None
+        },
    )
    .await
    .context("failed to subscribe for safekeeper info")?;
--- a/safekeeper/src/callmemaybe.rs
+++ b/safekeeper/src/callmemaybe.rs
@@ -1,305 +0,0 @@
-//!
-//!  Callmemaybe module is responsible for periodically requesting
-//!  pageserver to initiate wal streaming.
-//!
-//!  Other threads can use CallmeEvent messages to subscribe or unsubscribe
-//!  from the call list.
-//!
-use crate::SafeKeeperConf;
-use anyhow::{Context, Result};
-use std::collections::hash_map::Entry;
-use std::collections::HashMap;
-use std::sync::Mutex;
-use std::time::{Duration, Instant};
-use tokio::runtime;
-use tokio::sync::mpsc::UnboundedReceiver;
-use tokio::task;
-use tokio_postgres::NoTls;
-use tracing::*;
-use utils::{
-    connstring::connection_host_port,
-    zid::{ZTenantId, ZTimelineId},
-};
-
-async fn request_callback(
-    pageserver_connstr: String,
-    listen_pg_addr_str: String,
-    timelineid: ZTimelineId,
-    tenantid: ZTenantId,
-) -> Result<()> {
-    info!(
-        "callmemaybe request_callback Connecting to pageserver {}",
-        &pageserver_connstr
-    );
-    let (client, connection) = tokio_postgres::connect(&pageserver_connstr, NoTls).await?;
-
-    tokio::spawn(async move {
-        if let Err(e) = connection.await {
-            error!("connection error: {}", e);
-        }
-    });
-
-    // use Config parsing because SockAddr parsing doesn't allow to use host names instead of ip addresses
-    let me_connstr = format!("postgresql://no_user@{}/no_db", listen_pg_addr_str);
-    let me_conf: postgres::config::Config = me_connstr.parse().unwrap();
-    let (host, port) = connection_host_port(&me_conf);
-
-    // pageserver connstr is needed to be able to distinguish between different pageservers
-    // it is required to correctly manage callmemaybe subscriptions when more than one pageserver is involved
-    // TODO it is better to use some sort of a unique id instead of connection string, see https://github.com/zenithdb/zenith/issues/1105
-    let callme = format!(
-        "callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={} pageserver_connstr={}'",
-        tenantid, timelineid, host, port, timelineid, tenantid, pageserver_connstr,
-    );
-
-    let _ = client.simple_query(&callme).await?;
-
-    Ok(())
-}
-
-pub fn thread_main(conf: SafeKeeperConf, rx: UnboundedReceiver<CallmeEvent>) -> Result<()> {
-    let runtime = runtime::Builder::new_current_thread()
-        .enable_all()
-        .build()
-        .unwrap();
-
-    runtime.block_on(main_loop(conf, rx))
-}
-
-#[derive(Debug, PartialEq, Eq, Hash, Clone)]
-pub struct SubscriptionStateKey {
-    tenant_id: ZTenantId,
-    timeline_id: ZTimelineId,
-    pageserver_connstr: String,
-}
-
-impl SubscriptionStateKey {
-    pub fn new(tenant_id: ZTenantId, timeline_id: ZTimelineId, pageserver_connstr: String) -> Self {
-        Self {
-            tenant_id,
-            timeline_id,
-            pageserver_connstr,
-        }
-    }
-}
-
-/// Messages to the callmemaybe thread
-#[derive(Debug)]
-pub enum CallmeEvent {
-    // add new subscription to the list
-    Subscribe(SubscriptionStateKey),
-    // remove the subscription from the list
-    Unsubscribe(SubscriptionStateKey),
-    // don't serve this subscription, but keep it in the list
-    Pause(SubscriptionStateKey),
-    // resume this subscription, if it exists,
-    // but don't create a new one if it is gone
-    Resume(SubscriptionStateKey),
-    // TODO how do we delete from subscriptions?
-}
-
-#[derive(Debug)]
-struct SubscriptionState {
-    tenantid: ZTenantId,
-    timelineid: ZTimelineId,
-    pageserver_connstr: String,
-    handle: Option<task::JoinHandle<()>>,
-    last_call_time: Instant,
-    paused: bool,
-}
-
-impl SubscriptionState {
-    fn new(
-        tenantid: ZTenantId,
-        timelineid: ZTimelineId,
-        pageserver_connstr: String,
-    ) -> SubscriptionState {
-        SubscriptionState {
-            tenantid,
-            timelineid,
-            pageserver_connstr,
-            handle: None,
-            last_call_time: Instant::now(),
-            paused: false,
-        }
-    }
-
-    fn pause(&mut self) {
-        self.paused = true;
-        self.abort_handle();
-    }
-
-    fn resume(&mut self) {
-        self.paused = false;
-    }
-
-    // Most likely, the task have already successfully completed
-    // and abort() won't have any effect.
-    fn abort_handle(&mut self) {
-        if let Some(handle) = self.handle.take() {
-            handle.abort();
-
-            let timelineid = self.timelineid;
-            let tenantid = self.tenantid;
-            let pageserver_connstr = self.pageserver_connstr.clone();
-            tokio::spawn(async move {
-                if let Err(err) = handle.await {
-                    if err.is_cancelled() {
-                        warn!("callback task for timelineid={} tenantid={} was cancelled before spawning a new one",
-                            timelineid, tenantid);
-                    } else {
-                        error!(
-                            "callback task for timelineid={} tenantid={} pageserver_connstr={} failed: {}",
-                            timelineid, tenantid, pageserver_connstr, err
-                        );
-                    }
-                }
-            });
-        }
-    }
-
-    fn call(&mut self, recall_period: Duration, listen_pg_addr: String) {
-        // Ignore call request if this subscription is paused
-        if self.paused {
-            debug!(
-                "ignore call request for paused subscription \
-                tenantid: {}, timelineid: {}",
-                self.tenantid, self.timelineid
-            );
-            return;
-        }
-
-        // Check if it too early to recall
-        if self.handle.is_some() && self.last_call_time.elapsed() < recall_period {
-            debug!(
-                "too early to recall. self.last_call_time.elapsed: {:?}, recall_period: {:?} \
-                tenantid: {}, timelineid: {}",
-                self.last_call_time, recall_period, self.tenantid, self.timelineid
-            );
-            return;
-        }
-
-        // If previous task didn't complete in recall_period, it must be hanging,
-        // so don't wait for it forever, just abort it and try again.
-        self.abort_handle();
-
-        let timelineid = self.timelineid;
-        let tenantid = self.tenantid;
-        let pageserver_connstr = self.pageserver_connstr.clone();
-        self.handle = Some(tokio::spawn(async move {
-            request_callback(pageserver_connstr, listen_pg_addr, timelineid, tenantid)
-                .await
-                .unwrap_or_else(|e| {
-                    error!(
-                        "callback task for timelineid={} tenantid={} failed: {}",
-                        timelineid, tenantid, e
-                    )
-                });
-        }));
-
-        // Update last_call_time
-        self.last_call_time = Instant::now();
-        info!(
-            "new call spawned. last call time {:?} tenantid: {}, timelineid: {}",
-            self.last_call_time, self.tenantid, self.timelineid
-        );
-    }
-}
-
-impl Drop for SubscriptionState {
-    fn drop(&mut self) {
-        self.abort_handle();
-    }
-}
-
-pub async fn main_loop(conf: SafeKeeperConf, mut rx: UnboundedReceiver<CallmeEvent>) -> Result<()> {
-    let subscriptions: Mutex<HashMap<SubscriptionStateKey, SubscriptionState>> =
-        Mutex::new(HashMap::new());
-
-    let mut ticker = tokio::time::interval(conf.recall_period);
-    loop {
-        tokio::select! {
-            request = rx.recv() =>
-            {
-                match request.context("done")?
-                {
-                    CallmeEvent::Subscribe(key) =>
-                    {
-                        let _enter = info_span!("callmemaybe: subscribe", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
-                        let mut subscriptions = subscriptions.lock().unwrap();
-                        // XXX this clone is ugly, is there a way to use the trick with Borrow trait with entry API?
-                        //  when we switch to node id instead of the connection string key will be Copy and there will be no need to clone
-                        match subscriptions.entry(key.clone()) {
-                            Entry::Occupied(_) => {
-                                // Do nothing if subscription already exists
-                                // If it is paused it means that there is already established replication connection.
-                                // If it is not paused it will be polled with other subscriptions when timeout expires.
-                                // This can occur when replication channel is established before subscription is added.
-                                info!(
-                                    "subscription already exists",
-                                );
-                            }
-                            Entry::Vacant(entry) => {
-                                let subscription = entry.insert(SubscriptionState::new(
-                                    key.tenant_id,
-                                    key.timeline_id,
-                                    key.pageserver_connstr,
-                                ));
-                                subscription.call(conf.recall_period, conf.listen_pg_addr.clone());
-                            }
-                        }
-                    },
-                    CallmeEvent::Unsubscribe(key) => {
-                        let _enter = debug_span!("callmemaybe: unsubscribe", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
-                        debug!("unsubscribe");
-                        let mut subscriptions = subscriptions.lock().unwrap();
-                        subscriptions.remove(&key);
-
-                    },
-                    CallmeEvent::Pause(key) => {
-                        let _enter = debug_span!("callmemaybe: pause", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
-                        let mut subscriptions = subscriptions.lock().unwrap();
-                        // If pause received when no corresponding subscription exists it means that someone started replication
-                        // without using callmemaybe. So we create subscription and pause it.
-                        // In tenant relocation scenario subscribe call will be executed after pause when compute is restarted.
-                        // In that case there is no need to create new/unpause existing subscription.
-                        match subscriptions.entry(key.clone()) {
-                            Entry::Occupied(mut sub) => {
-                                debug!("pause existing");
-                                sub.get_mut().pause();
-                            }
-                            Entry::Vacant(entry) => {
-                                debug!("create paused");
-                                let subscription = entry.insert(SubscriptionState::new(
-                                    key.tenant_id,
-                                    key.timeline_id,
-                                    key.pageserver_connstr,
-                                ));
-                                subscription.pause();
-                            }
-                        }
-                    },
-                    CallmeEvent::Resume(key) => {
-                        debug!(
-                            "callmemaybe. thread_main. resume callback request for timelineid={} tenantid={} pageserver_connstr={}",
-                            key.timeline_id, key.tenant_id, key.pageserver_connstr,
-                        );
-                        let mut subscriptions = subscriptions.lock().unwrap();
-                        if let Some(sub) = subscriptions.get_mut(&key)
-                        {
-                            sub.resume();
-                        };
-                    },
-                }
-            },
-            _ = ticker.tick() => {
-                let _enter = debug_span!("callmemaybe: tick").entered();
-                let mut subscriptions = subscriptions.lock().unwrap();
-
-                for (_, state) in subscriptions.iter_mut() {
-                    state.call(conf.recall_period, conf.listen_pg_addr.clone());
-                }
-             },
-        };
-    }
-}
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -29,12 +29,11 @@ pub struct SafekeeperPostgresHandler {
    pub ztenantid: Option<ZTenantId>,
    pub ztimelineid: Option<ZTimelineId>,
    pub timeline: Option<Arc<Timeline>>,
-    pageserver_connstr: Option<String>,
 }

 /// Parsed Postgres command.
 enum SafekeeperPostgresCommand {
-    StartWalPush { pageserver_connstr: Option<String> },
+    StartWalPush,
    StartReplication { start_lsn: Lsn },
    IdentifySystem,
    JSONCtrl { cmd: AppendLogicalMessage },
@@ -42,11 +41,7 @@ enum SafekeeperPostgresCommand {

 fn parse_cmd(cmd: &str) -> Result<SafekeeperPostgresCommand> {
    if cmd.starts_with("START_WAL_PUSH") {
-        let re = Regex::new(r"START_WAL_PUSH(?: (.+))?").unwrap();
-
-        let caps = re.captures(cmd).unwrap();
-        let pageserver_connstr = caps.get(1).map(|m| m.as_str().to_owned());
-        Ok(SafekeeperPostgresCommand::StartWalPush { pageserver_connstr })
+        Ok(SafekeeperPostgresCommand::StartWalPush)
    } else if cmd.starts_with("START_REPLICATION") {
        let re =
            Regex::new(r"START_REPLICATION(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)").unwrap();
@@ -86,8 +81,6 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
                self.appname = Some(app_name.clone());
            }

-            self.pageserver_connstr = params.get("pageserver_connstr").cloned();
-
            Ok(())
        } else {
            bail!("Safekeeper received unexpected initial message: {:?}", sm);
@@ -113,14 +106,14 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
        }

        match cmd {
-            SafekeeperPostgresCommand::StartWalPush { pageserver_connstr } => {
-                ReceiveWalConn::new(pgb, pageserver_connstr)
+            SafekeeperPostgresCommand::StartWalPush => {
+                ReceiveWalConn::new(pgb)
                    .run(self)
                    .context("failed to run ReceiveWalConn")?;
            }
            SafekeeperPostgresCommand::StartReplication { start_lsn } => {
                ReplicationConn::new(pgb)
-                    .run(self, pgb, start_lsn, self.pageserver_connstr.clone())
+                    .run(self, pgb, start_lsn)
                    .context("failed to run ReplicationConn")?;
            }
            SafekeeperPostgresCommand::IdentifySystem => {
@@ -142,7 +135,6 @@ impl SafekeeperPostgresHandler {
            ztenantid: None,
            ztimelineid: None,
            timeline: None,
-            pageserver_connstr: None,
        }
    }

--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -1,9 +1,9 @@
-use etcd_broker::SkTimelineInfo;
-use hyper::{Body, Request, Response, StatusCode};
+use hyper::{Body, Request, Response, StatusCode, Uri};

+use once_cell::sync::Lazy;
 use serde::Serialize;
 use serde::Serializer;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::fmt::Display;
 use std::sync::Arc;

@@ -11,9 +11,11 @@ use crate::safekeeper::Term;
 use crate::safekeeper::TermHistory;
 use crate::timeline::{GlobalTimelines, TimelineDeleteForceResult};
 use crate::SafeKeeperConf;
+use etcd_broker::subscription_value::SkTimelineInfo;
 use utils::{
+    auth::JwtAuth,
    http::{
-        endpoint,
+        endpoint::{self, auth_middleware, check_permission},
        error::ApiError,
        json::{json_request, json_response},
        request::{ensure_no_body, parse_request_param},
@@ -32,6 +34,7 @@ struct SafekeeperStatus {

 /// Healthcheck handler.
 async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    check_permission(&request, None)?;
    let conf = get_conf(&request);
    let status = SafekeeperStatus { id: conf.my_id };
    json_response(StatusCode::OK, status)
@@ -91,6 +94,7 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
        parse_request_param(&request, "tenant_id")?,
        parse_request_param(&request, "timeline_id")?,
    );
+    check_permission(&request, Some(zttid.tenant_id))?;

    let tli = GlobalTimelines::get(get_conf(&request), zttid, false).map_err(ApiError::from_err)?;
    let (inmem, state) = tli.get_state();
@@ -125,6 +129,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
        tenant_id: request_data.tenant_id,
        timeline_id: request_data.timeline_id,
    };
+    check_permission(&request, Some(zttid.tenant_id))?;
    GlobalTimelines::create(get_conf(&request), zttid, request_data.peer_ids)
        .map_err(ApiError::from_err)?;

@@ -145,6 +150,7 @@ async fn timeline_delete_force_handler(
        parse_request_param(&request, "tenant_id")?,
        parse_request_param(&request, "timeline_id")?,
    );
+    check_permission(&request, Some(zttid.tenant_id))?;
    ensure_no_body(&mut request).await?;
    json_response(
        StatusCode::OK,
@@ -160,6 +166,7 @@ async fn tenant_delete_force_handler(
    mut request: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
    ensure_no_body(&mut request).await?;
    json_response(
        StatusCode::OK,
@@ -178,6 +185,7 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
        parse_request_param(&request, "tenant_id")?,
        parse_request_param(&request, "timeline_id")?,
    );
+    check_permission(&request, Some(zttid.tenant_id))?;
    let safekeeper_info: SkTimelineInfo = json_request(&mut request).await?;

    let tli = GlobalTimelines::get(get_conf(&request), zttid, false).map_err(ApiError::from_err)?;
@@ -188,15 +196,33 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
 }

 /// Safekeeper http router.
-pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError> {
-    let router = endpoint::make_router();
+pub fn make_router(
+    conf: SafeKeeperConf,
+    auth: Option<Arc<JwtAuth>>,
+) -> RouterBuilder<hyper::Body, ApiError> {
+    let mut router = endpoint::make_router();
+    if auth.is_some() {
+        router = router.middleware(auth_middleware(|request| {
+            #[allow(clippy::mutable_key_type)]
+            static ALLOWLIST_ROUTES: Lazy<HashSet<Uri>> =
+                Lazy::new(|| ["/v1/status"].iter().map(|v| v.parse().unwrap()).collect());
+            if ALLOWLIST_ROUTES.contains(request.uri()) {
+                None
+            } else {
+                // Option<Arc<JwtAuth>> is always provided as data below, hence unwrap().
+                request.data::<Option<Arc<JwtAuth>>>().unwrap().as_deref()
+            }
+        }))
+    }
    router
        .data(Arc::new(conf))
+        .data(auth)
        .get("/v1/status", status_handler)
        .get(
            "/v1/timeline/:tenant_id/:timeline_id",
            timeline_status_handler,
        )
+        // Will be used in the future instead of implicit timeline creation
        .post("/v1/timeline", timeline_create_handler)
        .delete(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -57,6 +57,7 @@ pub struct SafeKeeperConf {
    pub my_id: NodeId,
    pub broker_endpoints: Vec<Url>,
    pub broker_etcd_prefix: String,
+    pub auth_validation_public_key_path: Option<PathBuf>,
 }

 impl SafeKeeperConf {
@@ -88,6 +89,7 @@ impl Default for SafeKeeperConf {
            broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
            backup_runtime_threads: DEFAULT_WAL_BACKUP_RUNTIME_THREADS,
            wal_backup_enabled: true,
+            auth_validation_public_key_path: None,
        }
    }
 }
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -242,9 +242,9 @@ impl Collector for TimelineCollector {
            let timeline_id = tli.zttid.timeline_id.to_string();
            let labels = &[tenant_id.as_str(), timeline_id.as_str()];

-            let mut most_advanced: Option<utils::pq_proto::ZenithFeedback> = None;
+            let mut most_advanced: Option<utils::pq_proto::ReplicationFeedback> = None;
            for replica in tli.replicas.iter() {
-                if let Some(replica_feedback) = replica.zenith_feedback {
+                if let Some(replica_feedback) = replica.pageserver_feedback {
                    if let Some(current) = most_advanced {
                        if current.ps_writelsn < replica_feedback.ps_writelsn {
                            most_advanced = Some(replica_feedback);
--- a/safekeeper/src/receive_wal.rs
+++ b/safekeeper/src/receive_wal.rs
@@ -32,22 +32,14 @@ pub struct ReceiveWalConn<'pg> {
    pg_backend: &'pg mut PostgresBackend,
    /// The cached result of `pg_backend.socket().peer_addr()` (roughly)
    peer_addr: SocketAddr,
-    /// Pageserver connection string forwarded from compute
-    /// NOTE that it is allowed to operate without a pageserver.
-    /// So if compute has no pageserver configured do not use it.
-    pageserver_connstr: Option<String>,
 }

 impl<'pg> ReceiveWalConn<'pg> {
-    pub fn new(
-        pg: &'pg mut PostgresBackend,
-        pageserver_connstr: Option<String>,
-    ) -> ReceiveWalConn<'pg> {
+    pub fn new(pg: &'pg mut PostgresBackend) -> ReceiveWalConn<'pg> {
        let peer_addr = *pg.get_peer_addr();
        ReceiveWalConn {
            pg_backend: pg,
            peer_addr,
-            pageserver_connstr,
        }
    }

@@ -120,9 +112,7 @@ impl<'pg> ReceiveWalConn<'pg> {
                // Register the connection and defer unregister. Do that only
                // after processing first message, as it sets wal_seg_size,
                // wanted by many.
-                spg.timeline
-                    .get()
-                    .on_compute_connect(self.pageserver_connstr.as_ref())?;
+                spg.timeline.get().on_compute_connect()?;
                _guard = Some(ComputeConnectionGuard {
                    timeline: Arc::clone(spg.timeline.get()),
                });
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -4,7 +4,7 @@ use anyhow::{bail, Context, Result};
 use byteorder::{LittleEndian, ReadBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};

-use etcd_broker::SkTimelineInfo;
+use etcd_broker::subscription_value::SkTimelineInfo;
 use postgres_ffi::xlog_utils::TimeLineID;

 use postgres_ffi::xlog_utils::XLogSegNo;
@@ -23,7 +23,7 @@ use postgres_ffi::xlog_utils::MAX_SEND_SIZE;
 use utils::{
    bin_ser::LeSer,
    lsn::Lsn,
-    pq_proto::{SystemId, ZenithFeedback},
+    pq_proto::{ReplicationFeedback, SystemId},
    zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
 };

@@ -348,7 +348,7 @@ pub struct AppendResponse {
    // a criterion for walproposer --sync mode exit
    pub commit_lsn: Lsn,
    pub hs_feedback: HotStandbyFeedback,
-    pub zenith_feedback: ZenithFeedback,
+    pub pageserver_feedback: ReplicationFeedback,
 }

 impl AppendResponse {
@@ -358,7 +358,7 @@ impl AppendResponse {
            flush_lsn: Lsn(0),
            commit_lsn: Lsn(0),
            hs_feedback: HotStandbyFeedback::empty(),
-            zenith_feedback: ZenithFeedback::empty(),
+            pageserver_feedback: ReplicationFeedback::empty(),
        }
    }
 }
@@ -476,7 +476,7 @@ impl AcceptorProposerMessage {
                buf.put_u64_le(msg.hs_feedback.xmin);
                buf.put_u64_le(msg.hs_feedback.catalog_xmin);

-                msg.zenith_feedback.serialize(buf)?
+                msg.pageserver_feedback.serialize(buf)?
            }
        }

@@ -677,7 +677,7 @@ where
            commit_lsn: self.state.commit_lsn,
            // will be filled by the upper code to avoid bothering safekeeper
            hs_feedback: HotStandbyFeedback::empty(),
-            zenith_feedback: ZenithFeedback::empty(),
+            pageserver_feedback: ReplicationFeedback::empty(),
        };
        trace!("formed AppendResponse {:?}", ar);
        ar
--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -21,7 +21,7 @@ use utils::{
    bin_ser::BeSer,
    lsn::Lsn,
    postgres_backend::PostgresBackend,
-    pq_proto::{BeMessage, FeMessage, WalSndKeepAlive, XLogDataBody, ZenithFeedback},
+    pq_proto::{BeMessage, FeMessage, ReplicationFeedback, WalSndKeepAlive, XLogDataBody},
    sock_split::ReadStream,
 };

@@ -29,7 +29,7 @@ use utils::{
 const HOT_STANDBY_FEEDBACK_TAG_BYTE: u8 = b'h';
 const STANDBY_STATUS_UPDATE_TAG_BYTE: u8 = b'r';
 // zenith extension of replication protocol
-const ZENITH_STATUS_UPDATE_TAG_BYTE: u8 = b'z';
+const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z';

 type FullTransactionId = u64;

@@ -122,15 +122,15 @@ impl ReplicationConn {
                            warn!("unexpected StandbyReply. Read-only postgres replicas are not supported in safekeepers yet.");
                            // timeline.update_replica_state(replica_id, Some(state));
                        }
-                        Some(ZENITH_STATUS_UPDATE_TAG_BYTE) => {
+                        Some(NEON_STATUS_UPDATE_TAG_BYTE) => {
                            // Note: deserializing is on m[9..] because we skip the tag byte and len bytes.
                            let buf = Bytes::copy_from_slice(&m[9..]);
-                            let reply = ZenithFeedback::parse(buf);
+                            let reply = ReplicationFeedback::parse(buf);

-                            trace!("ZenithFeedback is {:?}", reply);
-                            // Only pageserver sends ZenithFeedback, so set the flag.
+                            trace!("ReplicationFeedback is {:?}", reply);
+                            // Only pageserver sends ReplicationFeedback, so set the flag.
                            // This replica is the source of information to resend to compute.
-                            state.zenith_feedback = Some(reply);
+                            state.pageserver_feedback = Some(reply);

                            timeline.update_replica_state(replica_id, state);
                        }
@@ -162,9 +162,8 @@ impl ReplicationConn {
        spg: &mut SafekeeperPostgresHandler,
        pgb: &mut PostgresBackend,
        mut start_pos: Lsn,
-        pageserver_connstr: Option<String>,
    ) -> Result<()> {
-        let _enter = info_span!("WAL sender", timeline = %spg.ztimelineid.unwrap(), pageserver_connstr = %pageserver_connstr.as_deref().unwrap_or_default()).entered();
+        let _enter = info_span!("WAL sender", timeline = %spg.ztimelineid.unwrap()).entered();

        // spawn the background thread which receives HotStandbyFeedback messages.
        let bg_timeline = Arc::clone(spg.timeline.get());
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -3,7 +3,7 @@

 use anyhow::{bail, Context, Result};

-use etcd_broker::SkTimelineInfo;
+use etcd_broker::subscription_value::SkTimelineInfo;
 use lazy_static::lazy_static;
 use postgres_ffi::xlog_utils::XLogSegNo;

@@ -21,7 +21,7 @@ use tracing::*;

 use utils::{
    lsn::Lsn,
-    pq_proto::ZenithFeedback,
+    pq_proto::ReplicationFeedback,
    zid::{NodeId, ZTenantId, ZTenantTimelineId},
 };

@@ -48,8 +48,8 @@ pub struct ReplicaState {
    pub remote_consistent_lsn: Lsn,
    /// combined hot standby feedback from all replicas
    pub hs_feedback: HotStandbyFeedback,
-    /// Zenith specific feedback received from pageserver, if any
-    pub zenith_feedback: Option<ZenithFeedback>,
+    /// Replication specific feedback received from pageserver, if any
+    pub pageserver_feedback: Option<ReplicationFeedback>,
 }

 impl Default for ReplicaState {
@@ -68,7 +68,7 @@ impl ReplicaState {
                xmin: u64::MAX,
                catalog_xmin: u64::MAX,
            },
-            zenith_feedback: None,
+            pageserver_feedback: None,
        }
    }
 }
@@ -95,7 +95,6 @@ struct SharedState {
    /// when tli is inactive instead of having this flag.
    active: bool,
    num_computes: u32,
-    pageserver_connstr: Option<String>,
    last_removed_segno: XLogSegNo,
 }

@@ -119,7 +118,6 @@ impl SharedState {
            wal_backup_active: false,
            active: false,
            num_computes: 0,
-            pageserver_connstr: None,
            last_removed_segno: 0,
        })
    }
@@ -139,7 +137,6 @@ impl SharedState {
            wal_backup_active: false,
            active: false,
            num_computes: 0,
-            pageserver_connstr: None,
            last_removed_segno: 0,
        })
    }
@@ -152,8 +149,12 @@ impl SharedState {

    /// Mark timeline active/inactive and return whether s3 offloading requires
    /// start/stop action.
-    fn update_status(&mut self) -> bool {
-        self.active = self.is_active();
+    fn update_status(&mut self, ttid: ZTenantTimelineId) -> bool {
+        let is_active = self.is_active();
+        if self.active != is_active {
+            info!("timeline {} active={} now", ttid, is_active);
+        }
+        self.active = is_active;
        self.is_wal_backup_action_pending()
    }

@@ -190,33 +191,10 @@ impl SharedState {
        self.wal_backup_active
    }

-    /// Activate timeline's walsender: start/change timeline information propagated into etcd for further pageserver connections.
-    fn activate_walsender(
-        &mut self,
-        zttid: &ZTenantTimelineId,
-        new_pageserver_connstr: Option<String>,
-    ) {
-        if self.pageserver_connstr != new_pageserver_connstr {
-            self.deactivate_walsender(zttid);
-
-            if new_pageserver_connstr.is_some() {
-                info!(
-                    "timeline {} has activated its walsender with connstr {new_pageserver_connstr:?}",
-                    zttid.timeline_id,
-                );
-            }
-            self.pageserver_connstr = new_pageserver_connstr;
-        }
-    }
-
-    /// Deactivate the timeline: stop sending the timeline data into etcd, so no pageserver can connect for WAL streaming.
-    fn deactivate_walsender(&mut self, zttid: &ZTenantTimelineId) {
-        if let Some(pageserver_connstr) = self.pageserver_connstr.take() {
-            info!(
-                "timeline {} had deactivated its wallsender with connstr {pageserver_connstr:?}",
-                zttid.timeline_id,
-            )
-        }
+    // Can this safekeeper offload to s3? Recently joined safekeepers might not
+    // have necessary WAL.
+    fn can_wal_backup(&self) -> bool {
+        self.sk.state.local_start_lsn <= self.sk.inmem.backup_lsn
    }

    fn get_wal_seg_size(&self) -> usize {
@@ -243,25 +221,25 @@ impl SharedState {
            // we need to know which pageserver compute node considers to be main.
            // See https://github.com/zenithdb/zenith/issues/1171
            //
-            if let Some(zenith_feedback) = state.zenith_feedback {
-                if let Some(acc_feedback) = acc.zenith_feedback {
-                    if acc_feedback.ps_writelsn < zenith_feedback.ps_writelsn {
+            if let Some(pageserver_feedback) = state.pageserver_feedback {
+                if let Some(acc_feedback) = acc.pageserver_feedback {
+                    if acc_feedback.ps_writelsn < pageserver_feedback.ps_writelsn {
                        warn!("More than one pageserver is streaming WAL for the timeline. Feedback resolving is not fully supported yet.");
-                        acc.zenith_feedback = Some(zenith_feedback);
+                        acc.pageserver_feedback = Some(pageserver_feedback);
                    }
                } else {
-                    acc.zenith_feedback = Some(zenith_feedback);
+                    acc.pageserver_feedback = Some(pageserver_feedback);
                }

                // last lsn received by pageserver
                // FIXME if multiple pageservers are streaming WAL, last_received_lsn must be tracked per pageserver.
                // See https://github.com/zenithdb/zenith/issues/1171
-                acc.last_received_lsn = Lsn::from(zenith_feedback.ps_writelsn);
+                acc.last_received_lsn = Lsn::from(pageserver_feedback.ps_writelsn);

                // When at least one pageserver has preserved data up to remote_consistent_lsn,
                // safekeeper is free to delete it, so choose max of all pageservers.
                acc.remote_consistent_lsn = max(
-                    Lsn::from(zenith_feedback.ps_applylsn),
+                    Lsn::from(pageserver_feedback.ps_applylsn),
                    acc.remote_consistent_lsn,
                );
            }
@@ -318,17 +296,12 @@ impl Timeline {
    /// Register compute connection, starting timeline-related activity if it is
    /// not running yet.
    /// Can fail only if channel to a static thread got closed, which is not normal at all.
-    pub fn on_compute_connect(&self, pageserver_connstr: Option<&String>) -> Result<()> {
+    pub fn on_compute_connect(&self) -> Result<()> {
        let is_wal_backup_action_pending: bool;
        {
            let mut shared_state = self.mutex.lock().unwrap();
            shared_state.num_computes += 1;
-            is_wal_backup_action_pending = shared_state.update_status();
-            // FIXME: currently we always adopt latest pageserver connstr, but we
-            // should have kind of generations assigned by compute to distinguish
-            // the latest one or even pass it through consensus to reliably deliver
-            // to all safekeepers.
-            shared_state.activate_walsender(&self.zttid, pageserver_connstr.cloned());
+            is_wal_backup_action_pending = shared_state.update_status(self.zttid);
        }
        // Wake up wal backup launcher, if offloading not started yet.
        if is_wal_backup_action_pending {
@@ -345,7 +318,7 @@ impl Timeline {
        {
            let mut shared_state = self.mutex.lock().unwrap();
            shared_state.num_computes -= 1;
-            is_wal_backup_action_pending = shared_state.update_status();
+            is_wal_backup_action_pending = shared_state.update_status(self.zttid);
        }
        // Wake up wal backup launcher, if it is time to stop the offloading.
        if is_wal_backup_action_pending {
@@ -364,7 +337,7 @@ impl Timeline {
            (replica_state.remote_consistent_lsn != Lsn::MAX && // Lsn::MAX means that we don't know the latest LSN yet.
             replica_state.remote_consistent_lsn >= shared_state.sk.inmem.commit_lsn);
            if stop {
-                shared_state.deactivate_walsender(&self.zttid);
+                shared_state.update_status(self.zttid);
                return Ok(true);
            }
        }
@@ -378,6 +351,12 @@ impl Timeline {
        shared_state.wal_backup_attend()
    }

+    // Can this safekeeper offload to s3? Recently joined safekeepers might not
+    // have necessary WAL.
+    pub fn can_wal_backup(&self) -> bool {
+        self.mutex.lock().unwrap().can_wal_backup()
+    }
+
    /// Deactivates the timeline, assuming it is being deleted.
    /// Returns whether the timeline was already active.
    ///
@@ -478,8 +457,8 @@ impl Timeline {
            if let Some(AcceptorProposerMessage::AppendResponse(ref mut resp)) = rmsg {
                let state = shared_state.get_replicas_state();
                resp.hs_feedback = state.hs_feedback;
-                if let Some(zenith_feedback) = state.zenith_feedback {
-                    resp.zenith_feedback = zenith_feedback;
+                if let Some(pageserver_feedback) = state.pageserver_feedback {
+                    resp.pageserver_feedback = pageserver_feedback;
                }
            }

@@ -525,7 +504,6 @@ impl Timeline {
            )),
            peer_horizon_lsn: Some(shared_state.sk.inmem.peer_horizon_lsn),
            safekeeper_connstr: Some(conf.listen_pg_addr.clone()),
-            pageserver_connstr: shared_state.pageserver_connstr.clone(),
            backup_lsn: Some(shared_state.sk.inmem.backup_lsn),
        })
    }
@@ -547,7 +525,7 @@ impl Timeline {
            }
            shared_state.sk.record_safekeeper_info(sk_info)?;
            self.notify_wal_senders(&mut shared_state);
-            is_wal_backup_action_pending = shared_state.update_status();
+            is_wal_backup_action_pending = shared_state.update_status(self.zttid);
            commit_lsn = shared_state.sk.inmem.commit_lsn;
        }
        self.commit_lsn_watch_tx.send(commit_lsn)?;
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -1,4 +1,7 @@
 use anyhow::{Context, Result};
+use etcd_broker::subscription_key::{
+    NodeKind, OperationKind, SkOperationKind, SubscriptionKey, SubscriptionKind,
+};
 use tokio::task::JoinHandle;

 use std::cmp::min;
@@ -26,8 +29,6 @@ use crate::{broker, SafeKeeperConf};

 use once_cell::sync::OnceCell;

-const BACKUP_ELECTION_NAME: &str = "WAL_BACKUP";
-
 const BROKER_CONNECTION_RETRY_DELAY_MS: u64 = 1000;

 const UPLOAD_FAILURE_RETRY_MIN_MS: u64 = 10;
@@ -48,14 +49,10 @@ pub fn wal_backup_launcher_thread_main(
    });
 }

-/// Check whether wal backup is required for timeline and mark that launcher is
-/// aware of current status (if timeline exists).
-fn is_wal_backup_required(zttid: ZTenantTimelineId) -> bool {
-    if let Some(tli) = GlobalTimelines::get_loaded(zttid) {
-        tli.wal_backup_attend()
-    } else {
-        false
-    }
+/// Check whether wal backup is required for timeline. If yes, mark that launcher is
+/// aware of current status and return the timeline.
+fn is_wal_backup_required(zttid: ZTenantTimelineId) -> Option<Arc<Timeline>> {
+    GlobalTimelines::get_loaded(zttid).filter(|t| t.wal_backup_attend())
 }

 struct WalBackupTaskHandle {
@@ -63,6 +60,56 @@ struct WalBackupTaskHandle {
    handle: JoinHandle<()>,
 }

+struct WalBackupTimelineEntry {
+    timeline: Arc<Timeline>,
+    handle: Option<WalBackupTaskHandle>,
+}
+
+/// Start per timeline task, if it makes sense for this safekeeper to offload.
+fn consider_start_task(
+    conf: &SafeKeeperConf,
+    zttid: ZTenantTimelineId,
+    task: &mut WalBackupTimelineEntry,
+) {
+    if !task.timeline.can_wal_backup() {
+        return;
+    }
+    info!("starting WAL backup task for {}", zttid);
+
+    // TODO: decide who should offload right here by simply checking current
+    // state instead of running elections in offloading task.
+    let election_name = SubscriptionKey {
+        cluster_prefix: conf.broker_etcd_prefix.clone(),
+        kind: SubscriptionKind::Operation(
+            zttid,
+            NodeKind::Safekeeper,
+            OperationKind::Safekeeper(SkOperationKind::WalBackup),
+        ),
+    }
+    .watch_key();
+    let my_candidate_name = broker::get_candiate_name(conf.my_id);
+    let election = broker::Election::new(
+        election_name,
+        my_candidate_name,
+        conf.broker_endpoints.clone(),
+    );
+
+    let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
+    let timeline_dir = conf.timeline_dir(&zttid);
+
+    let handle = tokio::spawn(
+        backup_task_main(zttid, timeline_dir, shutdown_rx, election)
+            .instrument(info_span!("WAL backup task", zttid = %zttid)),
+    );
+
+    task.handle = Some(WalBackupTaskHandle {
+        shutdown_tx,
+        handle,
+    });
+}
+
+const CHECK_TASKS_INTERVAL_MSEC: u64 = 1000;
+
 /// Sits on wal_backup_launcher_rx and starts/stops per timeline wal backup
 /// tasks. Having this in separate task simplifies locking, allows to reap
 /// panics and separate elections from offloading itself.
@@ -71,7 +118,7 @@ async fn wal_backup_launcher_main_loop(
    mut wal_backup_launcher_rx: Receiver<ZTenantTimelineId>,
 ) {
    info!(
-        "WAL backup launcher: started, remote config {:?}",
+        "WAL backup launcher started, remote config {:?}",
        conf.remote_storage
    );

@@ -82,60 +129,50 @@ async fn wal_backup_launcher_main_loop(
        })
    });

-    let mut tasks: HashMap<ZTenantTimelineId, WalBackupTaskHandle> = HashMap::new();
+    // Presense in this map means launcher is aware s3 offloading is needed for
+    // the timeline, but task is started only if it makes sense for to offload
+    // from this safekeeper.
+    let mut tasks: HashMap<ZTenantTimelineId, WalBackupTimelineEntry> = HashMap::new();

+    let mut ticker = tokio::time::interval(Duration::from_millis(CHECK_TASKS_INTERVAL_MSEC));
    loop {
-        // channel is never expected to get closed
-        let zttid = wal_backup_launcher_rx.recv().await.unwrap();
-        let is_wal_backup_required = is_wal_backup_required(zttid);
-        if conf.remote_storage.is_none() || !conf.wal_backup_enabled {
-            continue; /* just drain the channel and do nothing */
-        }
-        // do we need to do anything at all?
-        if is_wal_backup_required != tasks.contains_key(&zttid) {
-            if is_wal_backup_required {
-                // need to start the task
-                info!("starting WAL backup task for {}", zttid);
+        tokio::select! {
+            zttid = wal_backup_launcher_rx.recv() => {
+                // channel is never expected to get closed
+                let zttid = zttid.unwrap();
+                if conf.remote_storage.is_none() || !conf.wal_backup_enabled {
+                    continue; /* just drain the channel and do nothing */
+                }
+                let timeline = is_wal_backup_required(zttid);
+                // do we need to do anything at all?
+                if timeline.is_some() != tasks.contains_key(&zttid) {
+                    if let Some(timeline) = timeline {
+                        // need to start the task
+                        let entry = tasks.entry(zttid).or_insert(WalBackupTimelineEntry {
+                            timeline,
+                            handle: None,
+                        });
+                        consider_start_task(&conf, zttid, entry);
+                    } else {
+                        // need to stop the task
+                        info!("stopping WAL backup task for {}", zttid);

-                // TODO: decide who should offload in launcher itself by simply checking current state
-                let election_name = broker::get_campaign_name(
-                    BACKUP_ELECTION_NAME.to_string(),
-                    conf.broker_etcd_prefix.clone(),
-                    &zttid,
-                );
-                let my_candidate_name = broker::get_candiate_name(conf.my_id);
-                let election = broker::Election::new(
-                    election_name,
-                    my_candidate_name,
-                    conf.broker_endpoints.clone(),
-                );
-
-                let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
-                let timeline_dir = conf.timeline_dir(&zttid);
-
-                let handle = tokio::spawn(
-                    backup_task_main(zttid, timeline_dir, shutdown_rx, election)
-                        .instrument(info_span!("WAL backup task", zttid = %zttid)),
-                );
-
-                tasks.insert(
-                    zttid,
-                    WalBackupTaskHandle {
-                        shutdown_tx,
-                        handle,
-                    },
-                );
-            } else {
-                // need to stop the task
-                info!("stopping WAL backup task for {}", zttid);
-
-                let wb_handle = tasks.remove(&zttid).unwrap();
-                // Tell the task to shutdown. Error means task exited earlier, that's ok.
-                let _ = wb_handle.shutdown_tx.send(()).await;
-                // Await the task itself. TODO: restart panicked tasks earlier.
-                // Hm, why I can't await on reference to handle?
-                if let Err(e) = wb_handle.handle.await {
-                    warn!("WAL backup task for {} panicked: {}", zttid, e);
+                        let entry = tasks.remove(&zttid).unwrap();
+                        if let Some(wb_handle) = entry.handle {
+                            // Tell the task to shutdown. Error means task exited earlier, that's ok.
+                            let _ = wb_handle.shutdown_tx.send(()).await;
+                            // Await the task itself. TODO: restart panicked tasks earlier.
+                            if let Err(e) = wb_handle.handle.await {
+                                warn!("WAL backup task for {} panicked: {}", zttid, e);
+                            }
+                        }
+                    }
+                }
+            }
+            // Start known tasks, if needed and possible.
+            _ = ticker.tick() => {
+                for (zttid, entry) in tasks.iter_mut().filter(|(_, entry)| entry.handle.is_none()) {
+                    consider_start_task(&conf, *zttid, entry);
                }
            }
        }
@@ -200,20 +237,11 @@ impl WalBackupTask {
        loop {
            let mut retry_attempt = 0u32;

-            if let Some(l) = self.leader.take() {
-                l.give_up().await;
-            }
-
            info!("acquiring leadership");
-            match broker::get_leader(&self.election).await {
-                Ok(l) => {
-                    self.leader = Some(l);
-                }
-                Err(e) => {
-                    error!("error during leader election {:?}", e);
-                    sleep(Duration::from_millis(BROKER_CONNECTION_RETRY_DELAY_MS)).await;
-                    continue;
-                }
+            if let Err(e) = broker::get_leader(&self.election, &mut self.leader).await {
+                error!("error during leader election {:?}", e);
+                sleep(Duration::from_millis(BROKER_CONNECTION_RETRY_DELAY_MS)).await;
+                continue;
            }
            info!("acquired leadership");

--- a/scripts/generate_perf_report_page.py
+++ b/scripts/generate_perf_report_page.py
@@ -26,6 +26,7 @@ KEY_EXCLUDE_FIELDS = frozenset({
 })
 NEGATIVE_COLOR = 'negative'
 POSITIVE_COLOR = 'positive'
+EPS = 1e-6


@dataclass
@@ -120,7 +121,8 @@ def get_row_values(columns: List[str], run_result: SuitRun,
            # this might happen when new metric is added and there is no value for it in previous run
            # let this be here, TODO add proper handling when this actually happens
            raise ValueError(f'{column} not found in previous result')
-        ratio = float(value) / float(prev_value['value']) - 1
+        # adding `EPS` to each term to avoid ZeroDivisionError when the denominator is zero
+        ratio = (float(value) + EPS) / (float(prev_value['value']) + EPS) - 1
        ratio_display, color = format_ratio(ratio, current_value['report'])
        row_values.append(RowValue(value, color, ratio_display))
    return row_values
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -1,14 +1,14 @@
-## Zenith test runner
+## Neon test runner

 This directory contains integration tests.

 Prerequisites:
 - Correctly configured Python, see [`/docs/sourcetree.md`](/docs/sourcetree.md#using-python)
- Zenith and Postgres binaries
+- Neon and Postgres binaries
    - See the root [README.md](/README.md) for build directions
    - Tests can be run from the git tree; or see the environment variables
      below to run from other directories.
- The zenith git repo, including the postgres submodule
+- The neon git repo, including the postgres submodule
  (for some tests, e.g. `pg_regress`)
 - Some tests (involving storage nodes coordination) require etcd installed. Follow
  [`the guide`](https://etcd.io/docs/v3.5/install/) to obtain it.
@@ -51,8 +51,8 @@ Useful environment variables:
 should go.
 `TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
 `ZENITH_PAGESERVER_OVERRIDES`: add a `;`-separated set of configs that will be passed as
-`--pageserver-config-override=${value}` parameter values when zenith cli is invoked
-`RUST_LOG`: logging configuration to pass into Zenith CLI
+`--pageserver-config-override=${value}` parameter values when neon_local cli is invoked
+`RUST_LOG`: logging configuration to pass into Neon CLI

 Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
 `./scripts/pytest -s --log-cli-level=INFO ...`
@@ -65,32 +65,32 @@ Exit after the first test failure:

 ### Writing a test

-Every test needs a Zenith Environment, or ZenithEnv to operate in. A Zenith Environment
+Every test needs a Neon Environment, or NeonEnv to operate in. A Neon Environment
 is like a little cloud-in-a-box, and consists of a Pageserver, 0-N Safekeepers, and
 compute Postgres nodes. The connections between them can be configured to use JWT
 authentication tokens, and some other configuration options can be tweaked too.

-The easiest way to get access to a Zenith Environment is by using the `zenith_simple_env`
+The easiest way to get access to a Neon Environment is by using the `neon_simple_env`
 fixture. The 'simple' env may be shared across multiple tests, so don't shut down the nodes
 or make other destructive changes in that environment. Also don't assume that
 there are no tenants or branches or data in the cluster. For convenience, there is a
 branch called `empty`, though. The convention is to create a test-specific branch of
 that and load any test data there, instead of the 'main' branch.

-For more complicated cases, you can build a custom Zenith Environment, with the `zenith_env`
+For more complicated cases, you can build a custom Neon Environment, with the `neon_env`
 fixture:

 ```python
-def test_foobar(zenith_env_builder: ZenithEnvBuilder):
+def test_foobar(neon_env_builder: NeonEnvBuilder):
    # Prescribe the environment.
    # We want to have 3 safekeeper nodes, and use JWT authentication in the
    # connections to the page server
-    zenith_env_builder.num_safekeepers = 3
-    zenith_env_builder.set_pageserver_auth(True)
+    neon_env_builder.num_safekeepers = 3
+    neon_env_builder.set_pageserver_auth(True)

    # Now create the environment. This initializes the repository, and starts
    # up the page server and the safekeepers
-    env = zenith_env_builder.init_start()
+    env = neon_env_builder.init_start()

    # Run the test
    ...
--- a/test_runner/batch_others/test_ancestor_branch.py
+++ b/test_runner/batch_others/test_ancestor_branch.py
@@ -3,18 +3,18 @@ from contextlib import closing
 import psycopg2.extras
 import pytest
 from fixtures.log_helper import log
-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverApiException
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException


 #
 # Create ancestor branches off the main branch.
 #
-def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init_start()
+def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()

    # Override defaults, 1M gc_horizon and 4M checkpoint_distance.
    # Extend compaction_period and gc_period to disable background compaction and gc.
-    tenant, _ = env.zenith_cli.create_tenant(
+    tenant, _ = env.neon_cli.create_tenant(
        conf={
            'gc_period': '10 m',
            'gc_horizon': '1048576',
@@ -24,7 +24,7 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
            'compaction_target_size': '4194304',
        })

-    env.pageserver.safe_psql("failpoints flush-frozen=sleep(10000)")
+    env.pageserver.safe_psql("failpoints flush-frozen-before-sync=sleep(10000)")

    pg_branch0 = env.postgres.create_start('main', tenant_id=tenant)
    branch0_cur = pg_branch0.connect().cursor()
@@ -48,7 +48,7 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
    log.info(f'LSN after 100k rows: {lsn_100}')

    # Create branch1.
-    env.zenith_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100)
+    env.neon_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100)
    pg_branch1 = env.postgres.create_start('branch1', tenant_id=tenant)
    log.info("postgres is running on 'branch1' branch")

@@ -72,7 +72,7 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
    log.info(f'LSN after 200k rows: {lsn_200}')

    # Create branch2.
-    env.zenith_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200)
+    env.neon_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200)
    pg_branch2 = env.postgres.create_start('branch2', tenant_id=tenant)
    log.info("postgres is running on 'branch2' branch")
    branch2_cur = pg_branch2.connect().cursor()
@@ -110,15 +110,14 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
    assert branch2_cur.fetchone() == (300000, )


-def test_ancestor_branch_detach(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_ancestor_branch_detach(neon_simple_env: NeonEnv):
+    env = neon_simple_env

-    parent_timeline_id = env.zenith_cli.create_branch("test_ancestor_branch_detach_parent", "empty")
+    parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_detach_parent", "empty")

-    env.zenith_cli.create_branch("test_ancestor_branch_detach_branch1",
-                                 "test_ancestor_branch_detach_parent")
+    env.neon_cli.create_branch("test_ancestor_branch_detach_branch1",
+                               "test_ancestor_branch_detach_parent")

    ps_http = env.pageserver.http_client()
-    with pytest.raises(ZenithPageserverApiException,
-                       match="Failed to detach inmem tenant timeline"):
+    with pytest.raises(NeonPageserverApiException, match="Failed to detach inmem tenant timeline"):
        ps_http.timeline_detach(env.initial_tenant, parent_timeline_id)
--- a/test_runner/batch_others/test_auth.py
+++ b/test_runner/batch_others/test_auth.py
@@ -1,14 +1,14 @@
 from contextlib import closing
 from typing import Iterator
 from uuid import UUID, uuid4
-from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithPageserverApiException
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
 from requests.exceptions import HTTPError
 import pytest


-def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.pageserver_auth_enabled = True
-    env = zenith_env_builder.init_start()
+def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.auth_enabled = True
+    env = neon_env_builder.init_start()

    ps = env.pageserver

@@ -25,8 +25,8 @@ def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
    ps.safe_psql("set FOO", password=tenant_token)
    ps.safe_psql("set FOO", password=management_token)

-    new_timeline_id = env.zenith_cli.create_branch('test_pageserver_auth',
-                                                   tenant_id=env.initial_tenant)
+    new_timeline_id = env.neon_cli.create_branch('test_pageserver_auth',
+                                                 tenant_id=env.initial_tenant)

    # tenant can create branches
    tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
@@ -36,7 +36,7 @@ def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
                                           ancestor_timeline_id=new_timeline_id)

    # fail to create branch using token with different tenant_id
-    with pytest.raises(ZenithPageserverApiException,
+    with pytest.raises(NeonPageserverApiException,
                       match='Forbidden: Tenant id mismatch. Permission denied'):
        invalid_tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
                                                   ancestor_timeline_id=new_timeline_id)
@@ -46,21 +46,21 @@ def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):

    # fail to create tenant using tenant token
    with pytest.raises(
-            ZenithPageserverApiException,
+            NeonPageserverApiException,
            match='Forbidden: Attempt to access management api with tenant scope. Permission denied'
    ):
        tenant_http_client.tenant_create()


@pytest.mark.parametrize('with_safekeepers', [False, True])
-def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
-    zenith_env_builder.pageserver_auth_enabled = True
+def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
+    neon_env_builder.auth_enabled = True
    if with_safekeepers:
-        zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+        neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

    branch = f'test_compute_auth_to_pageserver{with_safekeepers}'
-    env.zenith_cli.create_branch(branch)
+    env.neon_cli.create_branch(branch)
    pg = env.postgres.create_start(branch)

    with closing(pg.connect()) as conn:
--- a/test_runner/batch_others/test_backpressure.py
+++ b/test_runner/batch_others/test_backpressure.py
@@ -1,15 +1,15 @@
 from contextlib import closing, contextmanager
 import psycopg2.extras
 import pytest
-from fixtures.zenith_fixtures import PgProtocol, ZenithEnvBuilder
+from fixtures.neon_fixtures import PgProtocol, NeonEnvBuilder
 from fixtures.log_helper import log
 import os
 import time
 import asyncpg
-from fixtures.zenith_fixtures import Postgres
+from fixtures.neon_fixtures import Postgres
 import threading

-pytest_plugins = ("fixtures.zenith_fixtures")
+pytest_plugins = ("fixtures.neon_fixtures")


@contextmanager
@@ -26,7 +26,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
    log.info("checks started")

    with pg_cur(pg) as cur:
-        cur.execute("CREATE EXTENSION neon")  # TODO move it to zenith_fixtures?
+        cur.execute("CREATE EXTENSION neon")  # TODO move it to neon_fixtures?

        cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
        res = cur.fetchone()
@@ -93,10 +93,10 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv


@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/1587")
-def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init_start()
+def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()
    # Create a branch for us
-    env.zenith_cli.create_branch('test_backpressure')
+    env.neon_cli.create_branch('test_backpressure')

    pg = env.postgres.create_start('test_backpressure',
                                   config_lines=['max_replication_write_lag=30MB'])
--- a/test_runner/batch_others/test_basebackup_error.py
+++ b/test_runner/batch_others/test_basebackup_error.py
@@ -1,7 +1,7 @@
 import pytest
 from contextlib import closing

-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.log_helper import log


@@ -9,9 +9,9 @@ from fixtures.log_helper import log
 # Test error handling, if the 'basebackup' command fails in the middle
 # of building the tar archive.
 #
-def test_basebackup_error(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_basebackup_error", "empty")
+def test_basebackup_error(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_basebackup_error", "empty")

    # Introduce failpoint
    env.pageserver.safe_psql(f"failpoints basebackup-before-control-file=return")
--- a/test_runner/batch_others/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -5,26 +5,26 @@ import psycopg2.extras
 import pytest
 from fixtures.log_helper import log
 from fixtures.utils import print_gc_result
-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder


 #
 # Create a couple of branches off the main branch, at a historical point in time.
 #
-def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
+def test_branch_behind(neon_env_builder: NeonEnvBuilder):

    # Use safekeeper in this test to avoid a subtle race condition.
    # Without safekeeper, walreceiver reconnection can stuck
    # because of IO deadlock.
    #
-    # See https://github.com/zenithdb/zenith/issues/1068
-    zenith_env_builder.num_safekeepers = 1
+    # See https://github.com/neondatabase/neon/issues/1068
+    neon_env_builder.num_safekeepers = 1
    # Disable pitr, because here we want to test branch creation after GC
-    zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
-    env = zenith_env_builder.init_start()
+    neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
+    env = neon_env_builder.init_start()

    # Branch at the point where only 100 rows were inserted
-    env.zenith_cli.create_branch('test_branch_behind')
+    env.neon_cli.create_branch('test_branch_behind')
    pgmain = env.postgres.create_start('test_branch_behind')
    log.info("postgres is running on 'test_branch_behind' branch")

@@ -61,9 +61,9 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
    log.info(f'LSN after 200100 rows: {lsn_b}')

    # Branch at the point where only 100 rows were inserted
-    env.zenith_cli.create_branch('test_branch_behind_hundred',
-                                 'test_branch_behind',
-                                 ancestor_start_lsn=lsn_a)
+    env.neon_cli.create_branch('test_branch_behind_hundred',
+                               'test_branch_behind',
+                               ancestor_start_lsn=lsn_a)

    # Insert many more rows. This generates enough WAL to fill a few segments.
    main_cur.execute('''
@@ -78,9 +78,9 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
    log.info(f'LSN after 400100 rows: {lsn_c}')

    # Branch at the point where only 200100 rows were inserted
-    env.zenith_cli.create_branch('test_branch_behind_more',
-                                 'test_branch_behind',
-                                 ancestor_start_lsn=lsn_b)
+    env.neon_cli.create_branch('test_branch_behind_more',
+                               'test_branch_behind',
+                               ancestor_start_lsn=lsn_b)

    pg_hundred = env.postgres.create_start('test_branch_behind_hundred')
    pg_more = env.postgres.create_start('test_branch_behind_more')
@@ -104,9 +104,9 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
    # Check bad lsn's for branching

    # branch at segment boundary
-    env.zenith_cli.create_branch('test_branch_segment_boundary',
-                                 'test_branch_behind',
-                                 ancestor_start_lsn="0/3000000")
+    env.neon_cli.create_branch('test_branch_segment_boundary',
+                               'test_branch_behind',
+                               ancestor_start_lsn="0/3000000")
    pg = env.postgres.create_start('test_branch_segment_boundary')
    cur = pg.connect().cursor()
    cur.execute('SELECT 1')
@@ -114,13 +114,13 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):

    # branch at pre-initdb lsn
    with pytest.raises(Exception, match="invalid branch start lsn"):
-        env.zenith_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42")
+        env.neon_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42")

    # branch at pre-ancestor lsn
    with pytest.raises(Exception, match="less than timeline ancestor lsn"):
-        env.zenith_cli.create_branch('test_branch_preinitdb',
-                                     'test_branch_behind',
-                                     ancestor_start_lsn="0/42")
+        env.neon_cli.create_branch('test_branch_preinitdb',
+                                   'test_branch_behind',
+                                   ancestor_start_lsn="0/42")

    # check that we cannot create branch based on garbage collected data
    with closing(env.pageserver.connect()) as psconn:
@@ -132,9 +132,9 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):

    with pytest.raises(Exception, match="invalid branch start lsn"):
        # this gced_lsn is pretty random, so if gc is disabled this woudln't fail
-        env.zenith_cli.create_branch('test_branch_create_fail',
-                                     'test_branch_behind',
-                                     ancestor_start_lsn=gced_lsn)
+        env.neon_cli.create_branch('test_branch_create_fail',
+                                   'test_branch_behind',
+                                   ancestor_start_lsn=gced_lsn)

    # check that after gc everything is still there
    hundred_cur.execute('SELECT count(*) FROM foo')
--- a/test_runner/batch_others/test_broken_timeline.py
+++ b/test_runner/batch_others/test_broken_timeline.py
@@ -1,22 +1,22 @@
 import pytest
 import concurrent.futures
 from contextlib import closing
-from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithEnv
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv
 from fixtures.log_helper import log
 import os


 # Test restarting page server, while safekeeper and compute node keep
 # running.
-def test_broken_timeline(zenith_env_builder: ZenithEnvBuilder):
+def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
    # One safekeeper is enough for this test.
-    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

    tenant_timelines = []

    for n in range(4):
-        tenant_id_uuid, timeline_id_uuid = env.zenith_cli.create_tenant()
+        tenant_id_uuid, timeline_id_uuid = env.neon_cli.create_tenant()
        tenant_id = tenant_id_uuid.hex
        timeline_id = timeline_id_uuid.hex

@@ -81,14 +81,14 @@ def test_broken_timeline(zenith_env_builder: ZenithEnvBuilder):
        log.info(f'compute startup failed as expected: {err}')


-def test_create_multiple_timelines_parallel(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
+    env = neon_simple_env

-    tenant_id, _ = env.zenith_cli.create_tenant()
+    tenant_id, _ = env.neon_cli.create_tenant()

    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        futures = [
-            executor.submit(env.zenith_cli.create_timeline,
+            executor.submit(env.neon_cli.create_timeline,
                            f"test-create-multiple-timelines-{i}",
                            tenant_id) for i in range(4)
        ]
@@ -96,20 +96,20 @@ def test_create_multiple_timelines_parallel(zenith_simple_env: ZenithEnv):
            future.result()


-def test_fix_broken_timelines_on_startup(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_fix_broken_timelines_on_startup(neon_simple_env: NeonEnv):
+    env = neon_simple_env

-    tenant_id, _ = env.zenith_cli.create_tenant()
+    tenant_id, _ = env.neon_cli.create_tenant()

    # Introduce failpoint when creating a new timeline
    env.pageserver.safe_psql(f"failpoints before-checkpoint-new-timeline=return")
    with pytest.raises(Exception, match="before-checkpoint-new-timeline"):
-        _ = env.zenith_cli.create_timeline("test_fix_broken_timelines", tenant_id)
+        _ = env.neon_cli.create_timeline("test_fix_broken_timelines", tenant_id)

    # Restart the page server
-    env.zenith_cli.pageserver_stop(immediate=True)
-    env.zenith_cli.pageserver_start()
+    env.neon_cli.pageserver_stop(immediate=True)
+    env.neon_cli.pageserver_start()

    # Check that the "broken" timeline is not loaded
-    timelines = env.zenith_cli.list_timelines(tenant_id)
+    timelines = env.neon_cli.list_timelines(tenant_id)
    assert len(timelines) == 1
--- a/test_runner/batch_others/test_clog_truncate.py
+++ b/test_runner/batch_others/test_clog_truncate.py
@@ -3,16 +3,16 @@ import os

 from contextlib import closing

-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.log_helper import log


 #
 # Test compute node start after clog truncation
 #
-def test_clog_truncate(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch('test_clog_truncate', 'empty')
+def test_clog_truncate(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch('test_clog_truncate', 'empty')

    # set aggressive autovacuum to make sure that truncation will happen
    config = [
@@ -62,9 +62,9 @@ def test_clog_truncate(zenith_simple_env: ZenithEnv):

    # create new branch after clog truncation and start a compute node on it
    log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
-    env.zenith_cli.create_branch('test_clog_truncate_new',
-                                 'test_clog_truncate',
-                                 ancestor_start_lsn=lsn_after_truncation)
+    env.neon_cli.create_branch('test_clog_truncate_new',
+                               'test_clog_truncate',
+                               ancestor_start_lsn=lsn_after_truncation)
    pg2 = env.postgres.create_start('test_clog_truncate_new')
    log.info('postgres is running on test_clog_truncate_new branch')

--- a/test_runner/batch_others/test_config.py
+++ b/test_runner/batch_others/test_config.py
@@ -1,15 +1,15 @@
 from contextlib import closing

-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.log_helper import log


 #
 # Test starting Postgres with custom options
 #
-def test_config(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_config", "empty")
+def test_config(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_config", "empty")

    # change config
    pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])
--- a/test_runner/batch_others/test_createdropdb.py
+++ b/test_runner/batch_others/test_createdropdb.py
@@ -2,16 +2,16 @@ import os
 import pathlib

 from contextlib import closing
-from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
+from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
 from fixtures.log_helper import log


 #
 # Test CREATE DATABASE when there have been relmapper changes
 #
-def test_createdb(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch('test_createdb', 'empty')
+def test_createdb(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch('test_createdb', 'empty')

    pg = env.postgres.create_start('test_createdb')
    log.info("postgres is running on 'test_createdb' branch")
@@ -27,7 +27,7 @@ def test_createdb(zenith_simple_env: ZenithEnv):
            lsn = cur.fetchone()[0]

    # Create a branch
-    env.zenith_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn)
+    env.neon_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn)
    pg2 = env.postgres.create_start('test_createdb2')

    # Test that you can connect to the new database on both branches
@@ -40,16 +40,16 @@ def test_createdb(zenith_simple_env: ZenithEnv):
                    ('foodb', ))
                res = cur.fetchone()
                # check that dbsize equals sum of all relation sizes, excluding shared ones
-                # This is how we define dbsize in zenith for now
+                # This is how we define dbsize in neon for now
                assert res[0] == res[1]


 #
 # Test DROP DATABASE
 #
-def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch('test_dropdb', 'empty')
+def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
+    env = neon_simple_env
+    env.neon_cli.create_branch('test_dropdb', 'empty')
    pg = env.postgres.create_start('test_dropdb')
    log.info("postgres is running on 'test_dropdb' branch")

@@ -73,14 +73,14 @@ def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
            lsn_after_drop = cur.fetchone()[0]

    # Create two branches before and after database drop.
-    env.zenith_cli.create_branch('test_before_dropdb',
-                                 'test_dropdb',
-                                 ancestor_start_lsn=lsn_before_drop)
+    env.neon_cli.create_branch('test_before_dropdb',
+                               'test_dropdb',
+                               ancestor_start_lsn=lsn_before_drop)
    pg_before = env.postgres.create_start('test_before_dropdb')

-    env.zenith_cli.create_branch('test_after_dropdb',
-                                 'test_dropdb',
-                                 ancestor_start_lsn=lsn_after_drop)
+    env.neon_cli.create_branch('test_after_dropdb',
+                               'test_dropdb',
+                               ancestor_start_lsn=lsn_after_drop)
    pg_after = env.postgres.create_start('test_after_dropdb')

    # Test that database exists on the branch before drop
--- a/test_runner/batch_others/test_createuser.py
+++ b/test_runner/batch_others/test_createuser.py
@@ -1,15 +1,15 @@
 from contextlib import closing

-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.log_helper import log


 #
 # Test CREATE USER to check shared catalog restore
 #
-def test_createuser(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch('test_createuser', 'empty')
+def test_createuser(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch('test_createuser', 'empty')
    pg = env.postgres.create_start('test_createuser')
    log.info("postgres is running on 'test_createuser' branch")

@@ -24,7 +24,7 @@ def test_createuser(zenith_simple_env: ZenithEnv):
            lsn = cur.fetchone()[0]

    # Create a branch
-    env.zenith_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn)
+    env.neon_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn)
    pg2 = env.postgres.create_start('test_createuser2')

    # Test that you can connect to new branch as a new user
--- a/test_runner/batch_others/test_gc_aggressive.py
+++ b/test_runner/batch_others/test_gc_aggressive.py
@@ -1,7 +1,7 @@
 import asyncio
 import random

-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
 from fixtures.log_helper import log

 # Test configuration
@@ -27,7 +27,7 @@ async def update_table(pg: Postgres):


 # Perform aggressive GC with 0 horizon
-async def gc(env: ZenithEnv, timeline: str):
+async def gc(env: NeonEnv, timeline: str):
    psconn = await env.pageserver.connect_async()

    while updates_performed < updates_to_perform:
@@ -35,7 +35,7 @@ async def gc(env: ZenithEnv, timeline: str):


 # At the same time, run UPDATEs and GC
-async def update_and_gc(env: ZenithEnv, pg: Postgres, timeline: str):
+async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: str):
    workers = []
    for worker_id in range(num_connections):
        workers.append(asyncio.create_task(update_table(pg)))
@@ -48,14 +48,14 @@ async def update_and_gc(env: ZenithEnv, pg: Postgres, timeline: str):
 #
 # Aggressively force GC, while running queries.
 #
-# (repro for https://github.com/zenithdb/zenith/issues/1047)
+# (repro for https://github.com/neondatabase/neon/issues/1047)
 #
-def test_gc_aggressive(zenith_env_builder: ZenithEnvBuilder):
+def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):

    # Disable pitr, because here we want to test branch creation after GC
-    zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
-    env = zenith_env_builder.init_start()
-    env.zenith_cli.create_branch("test_gc_aggressive", "main")
+    neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
+    env = neon_env_builder.init_start()
+    env.neon_cli.create_branch("test_gc_aggressive", "main")
    pg = env.postgres.create_start('test_gc_aggressive')
    log.info('postgres is running on test_gc_aggressive branch')

--- a/test_runner/batch_others/test_lsn_mapping.py
+++ b/test_runner/batch_others/test_lsn_mapping.py
@@ -4,7 +4,7 @@ import math
 from uuid import UUID
 import psycopg2.extras
 import psycopg2.errors
-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
 from fixtures.log_helper import log
 import time

@@ -12,11 +12,11 @@ import time
 #
 # Test pageserver get_lsn_by_timestamp API
 #
-def test_lsn_mapping(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
+def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 1
+    env = neon_env_builder.init_start()

-    new_timeline_id = env.zenith_cli.create_branch('test_lsn_mapping')
+    new_timeline_id = env.neon_cli.create_branch('test_lsn_mapping')
    pgmain = env.postgres.create_start("test_lsn_mapping")
    log.info("postgres is running on 'test_lsn_mapping' branch")

--- a/test_runner/batch_others/test_multixact.py
+++ b/test_runner/batch_others/test_multixact.py
@@ -1,4 +1,4 @@
-from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
+from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
 from fixtures.log_helper import log


@@ -8,9 +8,9 @@ from fixtures.log_helper import log
 # it only checks next_multixact_id field in restored pg_control,
 # since we don't have functions to check multixact internals.
 #
-def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch('test_multixact', 'empty')
+def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
+    env = neon_simple_env
+    env.neon_cli.create_branch('test_multixact', 'empty')
    pg = env.postgres.create_start('test_multixact')

    log.info("postgres is running on 'test_multixact' branch")
@@ -60,7 +60,7 @@ def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
    assert int(next_multixact_id) > int(next_multixact_id_old)

    # Branch at this point
-    env.zenith_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn)
+    env.neon_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn)
    pg_new = env.postgres.create_start('test_multixact_new')

    log.info("postgres is running on 'test_multixact_new' branch")
--- a/test_runner/batch_others/test_zenith_cli.py
+++ b/test_runner/batch_others/test_zenith_cli.py
@@ -1,12 +1,12 @@
 import uuid
 import requests

-from fixtures.zenith_fixtures import DEFAULT_BRANCH_NAME, ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
+from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
 from typing import cast


-def helper_compare_timeline_list(pageserver_http_client: ZenithPageserverHttpClient,
-                                 env: ZenithEnv,
+def helper_compare_timeline_list(pageserver_http_client: NeonPageserverHttpClient,
+                                 env: NeonEnv,
                                 initial_tenant: uuid.UUID):
    """
    Compare timelines list returned by CLI and directly via API.
@@ -17,65 +17,65 @@ def helper_compare_timeline_list(pageserver_http_client: ZenithPageserverHttpCli
        map(lambda t: cast(str, t['timeline_id']),
            pageserver_http_client.timeline_list(initial_tenant)))

-    timelines_cli = env.zenith_cli.list_timelines()
-    assert timelines_cli == env.zenith_cli.list_timelines(initial_tenant)
+    timelines_cli = env.neon_cli.list_timelines()
+    assert timelines_cli == env.neon_cli.list_timelines(initial_tenant)

    cli_timeline_ids = sorted([timeline_id for (_, timeline_id) in timelines_cli])
    assert timelines_api == cli_timeline_ids


-def test_cli_timeline_list(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_cli_timeline_list(neon_simple_env: NeonEnv):
+    env = neon_simple_env
    pageserver_http_client = env.pageserver.http_client()

    # Initial sanity check
    helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)

    # Create a branch for us
-    main_timeline_id = env.zenith_cli.create_branch('test_cli_branch_list_main')
+    main_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_main')
    helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)

    # Create a nested branch
-    nested_timeline_id = env.zenith_cli.create_branch('test_cli_branch_list_nested',
-                                                      'test_cli_branch_list_main')
+    nested_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_nested',
+                                                    'test_cli_branch_list_main')
    helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)

    # Check that all new branches are visible via CLI
-    timelines_cli = [timeline_id for (_, timeline_id) in env.zenith_cli.list_timelines()]
+    timelines_cli = [timeline_id for (_, timeline_id) in env.neon_cli.list_timelines()]

    assert main_timeline_id.hex in timelines_cli
    assert nested_timeline_id.hex in timelines_cli


-def helper_compare_tenant_list(pageserver_http_client: ZenithPageserverHttpClient, env: ZenithEnv):
+def helper_compare_tenant_list(pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv):
    tenants = pageserver_http_client.tenant_list()
    tenants_api = sorted(map(lambda t: cast(str, t['id']), tenants))

-    res = env.zenith_cli.list_tenants()
+    res = env.neon_cli.list_tenants()
    tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))

    assert tenants_api == tenants_cli


-def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_cli_tenant_list(neon_simple_env: NeonEnv):
+    env = neon_simple_env
    pageserver_http_client = env.pageserver.http_client()
    # Initial sanity check
    helper_compare_tenant_list(pageserver_http_client, env)

    # Create new tenant
-    tenant1, _ = env.zenith_cli.create_tenant()
+    tenant1, _ = env.neon_cli.create_tenant()

    # check tenant1 appeared
    helper_compare_tenant_list(pageserver_http_client, env)

    # Create new tenant
-    tenant2, _ = env.zenith_cli.create_tenant()
+    tenant2, _ = env.neon_cli.create_tenant()

    # check tenant2 appeared
    helper_compare_tenant_list(pageserver_http_client, env)

-    res = env.zenith_cli.list_tenants()
+    res = env.neon_cli.list_tenants()
    tenants = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))

    assert env.initial_tenant.hex in tenants
@@ -83,18 +83,18 @@ def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
    assert tenant2.hex in tenants


-def test_cli_tenant_create(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    tenant_id, _ = env.zenith_cli.create_tenant()
-    timelines = env.zenith_cli.list_timelines(tenant_id)
+def test_cli_tenant_create(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    tenant_id, _ = env.neon_cli.create_tenant()
+    timelines = env.neon_cli.list_timelines(tenant_id)

    # an initial timeline should be created upon tenant creation
    assert len(timelines) == 1
    assert timelines[0][0] == DEFAULT_BRANCH_NAME


-def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init_start()
+def test_cli_ipv4_listeners(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()

    # Connect to sk port on v4 loopback
    res = requests.get(f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status')
@@ -108,17 +108,17 @@ def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder):
    # assert res.ok


-def test_cli_start_stop(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init_start()
+def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()

    # Stop default ps/sk
-    env.zenith_cli.pageserver_stop()
-    env.zenith_cli.safekeeper_stop()
+    env.neon_cli.pageserver_stop()
+    env.neon_cli.safekeeper_stop()

    # Default start
-    res = env.zenith_cli.raw_cli(["start"])
+    res = env.neon_cli.raw_cli(["start"])
    res.check_returncode()

    # Default stop
-    res = env.zenith_cli.raw_cli(["stop"])
+    res = env.neon_cli.raw_cli(["stop"])
    res.check_returncode()
--- a/test_runner/batch_others/test_next_xid.py
+++ b/test_runner/batch_others/test_next_xid.py
@@ -1,12 +1,12 @@
 import time

-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder


 # Test restarting page server, while safekeeper and compute node keep
 # running.
-def test_next_xid(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init_start()
+def test_next_xid(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()

    pg = env.postgres.create_start('main')

--- a/test_runner/batch_others/test_normal_work.py
+++ b/test_runner/batch_others/test_normal_work.py
@@ -1,9 +1,10 @@
 from fixtures.log_helper import log
-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
+import pytest


-def check_tenant(env: ZenithEnv, pageserver_http: ZenithPageserverHttpClient):
-    tenant_id, timeline_id = env.zenith_cli.create_tenant()
+def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
+    tenant_id, timeline_id = env.neon_cli.create_tenant()
    pg = env.postgres.create_start('main', tenant_id=tenant_id)
    # we rely upon autocommit after each statement
    res_1 = pg.safe_psql_many(queries=[
@@ -26,7 +27,8 @@ def check_tenant(env: ZenithEnv, pageserver_http: ZenithPageserverHttpClient):
    pageserver_http.timeline_detach(tenant_id, timeline_id)


-def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
+@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)])
+def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int):
    """
    Basic test:
    * create new tenant with a timeline
@@ -40,8 +42,9 @@ def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
    Repeat check for several tenants/timelines.
    """

-    env = zenith_env_builder.init_start()
+    neon_env_builder.num_safekeepers = num_safekeepers
+    env = neon_env_builder.init_start()
    pageserver_http = env.pageserver.http_client()

-    for _ in range(3):
+    for _ in range(num_timelines):
        check_tenant(env, pageserver_http)
--- a/test_runner/batch_others/test_old_request_lsn.py
+++ b/test_runner/batch_others/test_old_request_lsn.py
@@ -1,4 +1,4 @@
-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log
 from fixtures.utils import print_gc_result
 import psycopg2.extras
@@ -14,11 +14,11 @@ import psycopg2.extras
 # just a hint that the page hasn't been modified since that LSN, and the page
 # server should return the latest page version regardless of the LSN.
 #
-def test_old_request_lsn(zenith_env_builder: ZenithEnvBuilder):
+def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
    # Disable pitr, because here we want to test branch creation after GC
-    zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
-    env = zenith_env_builder.init_start()
-    env.zenith_cli.create_branch("test_old_request_lsn", "main")
+    neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
+    env = neon_env_builder.init_start()
+    env.neon_cli.create_branch("test_old_request_lsn", "main")
    pg = env.postgres.create_start('test_old_request_lsn')
    log.info('postgres is running on test_old_request_lsn branch')

--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -2,26 +2,26 @@ from typing import Optional
 from uuid import uuid4, UUID
 import pytest
 from fixtures.utils import lsn_from_hex
-from fixtures.zenith_fixtures import (
+from fixtures.neon_fixtures import (
    DEFAULT_BRANCH_NAME,
-    ZenithEnv,
-    ZenithEnvBuilder,
-    ZenithPageserverHttpClient,
-    ZenithPageserverApiException,
+    NeonEnv,
+    NeonEnvBuilder,
+    NeonPageserverHttpClient,
+    NeonPageserverApiException,
    wait_until,
 )


 # test that we cannot override node id
-def test_pageserver_init_node_id(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init()
+def test_pageserver_init_node_id(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init()
    with pytest.raises(
            Exception,
            match="node id can only be set during pageserver init and cannot be overridden"):
        env.pageserver.start(overrides=['--pageserver-config-override=id=10'])


-def check_client(client: ZenithPageserverHttpClient, initial_tenant: UUID):
+def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):
    client.check_status()

    # check initial tenant is there
@@ -57,11 +57,11 @@ def check_client(client: ZenithPageserverHttpClient, initial_tenant: UUID):
        assert local_timeline_details['timeline_state'] == 'Loaded'


-def test_pageserver_http_get_wal_receiver_not_found(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
+    env = neon_simple_env
    client = env.pageserver.http_client()

-    tenant_id, timeline_id = env.zenith_cli.create_tenant()
+    tenant_id, timeline_id = env.neon_cli.create_tenant()

    empty_response = client.wal_receiver_get(tenant_id, timeline_id)

@@ -70,11 +70,11 @@ def test_pageserver_http_get_wal_receiver_not_found(zenith_simple_env: ZenithEnv
    assert empty_response.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'


-def test_pageserver_http_get_wal_receiver_success(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
+    env = neon_simple_env
    client = env.pageserver.http_client()

-    tenant_id, timeline_id = env.zenith_cli.create_tenant()
+    tenant_id, timeline_id = env.neon_cli.create_tenant()
    pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)

    def expect_updated_msg_lsn(prev_msg_lsn: Optional[int]) -> int:
@@ -107,15 +107,15 @@ def test_pageserver_http_get_wal_receiver_success(zenith_simple_env: ZenithEnv):
    wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(lsn))


-def test_pageserver_http_api_client(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_pageserver_http_api_client(neon_simple_env: NeonEnv):
+    env = neon_simple_env
    client = env.pageserver.http_client()
    check_client(client, env.initial_tenant)


-def test_pageserver_http_api_client_auth_enabled(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.pageserver_auth_enabled = True
-    env = zenith_env_builder.init_start()
+def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.auth_enabled = True
+    env = neon_env_builder.init_start()

    management_token = env.auth_keys.generate_management_token()

--- a/test_runner/batch_others/test_pageserver_catchup.py
+++ b/test_runner/batch_others/test_pageserver_catchup.py
@@ -1,15 +1,15 @@
-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder


 # Test safekeeper sync and pageserver catch up
 # while initial compute node is down and pageserver is lagging behind safekeepers.
 # Ensure that basebackup after restart of all components is correct
 # and new compute node contains all data.
-def test_pageserver_catchup_while_compute_down(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_pageserver_catchup_while_compute_down')
+    env.neon_cli.create_branch('test_pageserver_catchup_while_compute_down')
    # Make shared_buffers large to ensure we won't query pageserver while it is down.
    pg = env.postgres.create_start('test_pageserver_catchup_while_compute_down',
                                   config_lines=['shared_buffers=512MB'])
--- a/test_runner/batch_others/test_pageserver_restart.py
+++ b/test_runner/batch_others/test_pageserver_restart.py
@@ -1,13 +1,13 @@
-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log


 # Test restarting page server, while safekeeper and compute node keep
 # running.
-def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init_start()
+def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_pageserver_restart')
+    env.neon_cli.create_branch('test_pageserver_restart')
    pg = env.postgres.create_start('test_pageserver_restart')

    pg_conn = pg.connect()
--- a/test_runner/batch_others/test_parallel_copy.py
+++ b/test_runner/batch_others/test_parallel_copy.py
@@ -1,6 +1,6 @@
 from io import BytesIO
 import asyncio
-from fixtures.zenith_fixtures import ZenithEnv, Postgres
+from fixtures.neon_fixtures import NeonEnv, Postgres
 from fixtures.log_helper import log


@@ -38,9 +38,9 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int):


 # Load data into one table with COPY TO from 5 parallel connections
-def test_parallel_copy(zenith_simple_env: ZenithEnv, n_parallel=5):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_parallel_copy", "empty")
+def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_parallel_copy", "empty")
    pg = env.postgres.create_start('test_parallel_copy')
    log.info("postgres is running on 'test_parallel_copy' branch")

--- a/test_runner/batch_others/test_pitr_gc.py
+++ b/test_runner/batch_others/test_pitr_gc.py
@@ -5,20 +5,20 @@ import psycopg2.extras
 import pytest
 from fixtures.log_helper import log
 from fixtures.utils import print_gc_result
-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder


 #
 # Check pitr_interval GC behavior.
 # Insert some data, run GC and create a branch in the past.
 #
-def test_pitr_gc(zenith_env_builder: ZenithEnvBuilder):
+def test_pitr_gc(neon_env_builder: NeonEnvBuilder):

-    zenith_env_builder.num_safekeepers = 1
+    neon_env_builder.num_safekeepers = 1
    # Set pitr interval such that we need to keep the data
-    zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
+    neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}"

-    env = zenith_env_builder.init_start()
+    env = neon_env_builder.init_start()
    pgmain = env.postgres.create_start('main')
    log.info("postgres is running on 'main' branch")

@@ -62,7 +62,7 @@ def test_pitr_gc(zenith_env_builder: ZenithEnvBuilder):

    # Branch at the point where only 100 rows were inserted
    # It must have been preserved by PITR setting
-    env.zenith_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a)
+    env.neon_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a)

    pg_hundred = env.postgres.create_start('test_pitr_gc_hundred')

--- a/test_runner/batch_others/test_read_validation.py
+++ b/test_runner/batch_others/test_read_validation.py
@@ -1,12 +1,12 @@
 from contextlib import closing

-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.log_helper import log

 from psycopg2.errors import UndefinedTable
 from psycopg2.errors import IoError

-pytest_plugins = ("fixtures.zenith_fixtures")
+pytest_plugins = ("fixtures.neon_fixtures")

 extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]

@@ -14,9 +14,9 @@ extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
 #
 # Validation of reading different page versions
 #
-def test_read_validation(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_read_validation", "empty")
+def test_read_validation(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_read_validation", "empty")

    pg = env.postgres.create_start("test_read_validation")
    log.info("postgres is running on 'test_read_validation' branch")
@@ -125,9 +125,9 @@ def test_read_validation(zenith_simple_env: ZenithEnv):
                log.info("Caught an expected failure: {}".format(e))


-def test_read_validation_neg(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_read_validation_neg", "empty")
+def test_read_validation_neg(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_read_validation_neg", "empty")

    pg = env.postgres.create_start("test_read_validation_neg")
    log.info("postgres is running on 'test_read_validation_neg' branch")
--- a/test_runner/batch_others/test_readonly_node.py
+++ b/test_runner/batch_others/test_readonly_node.py
@@ -1,6 +1,6 @@
 import pytest
 from fixtures.log_helper import log
-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv


 #
@@ -9,9 +9,9 @@ from fixtures.zenith_fixtures import ZenithEnv
 # This is very similar to the 'test_branch_behind' test, but instead of
 # creating branches, creates read-only nodes.
 #
-def test_readonly_node(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch('test_readonly_node', 'empty')
+def test_readonly_node(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch('test_readonly_node', 'empty')
    pgmain = env.postgres.create_start('test_readonly_node')
    log.info("postgres is running on 'test_readonly_node' branch")

--- a/test_runner/batch_others/test_recovery.py
+++ b/test_runner/batch_others/test_recovery.py
@@ -4,28 +4,28 @@ import psycopg2.extras
 import json
 from ast import Assert
 from contextlib import closing
-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log


 #
 # Test pageserver recovery after crash
 #
-def test_pageserver_recovery(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 1
+def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 1
    # Override default checkpointer settings to run it more often
-    zenith_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
+    neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"

-    env = zenith_env_builder.init()
+    env = neon_env_builder.init()

    # Check if failpoints enables. Otherwise the test doesn't make sense
-    f = env.zenith_cli.pageserver_enabled_features()
+    f = env.neon_cli.pageserver_enabled_features()

    assert "failpoints" in f["features"], "Build pageserver with --features=failpoints option to run this test"
-    zenith_env_builder.start()
+    neon_env_builder.start()

    # Create a branch for us
-    env.zenith_cli.create_branch("test_pageserver_recovery", "main")
+    env.neon_cli.create_branch("test_pageserver_recovery", "main")

    pg = env.postgres.create_start('test_pageserver_recovery')
    log.info("postgres is running on 'test_pageserver_recovery' branch")
@@ -45,7 +45,8 @@ def test_pageserver_recovery(zenith_env_builder: ZenithEnvBuilder):

                    # Configure failpoints
                    pscur.execute(
-                        "failpoints checkpoint-before-sync=sleep(2000);checkpoint-after-sync=exit")
+                        "failpoints flush-frozen-before-sync=sleep(2000);checkpoint-after-sync=exit"
+                    )

                    # Do some updates until pageserver is crashed
                    try:
--- a/test_runner/batch_others/test_remote_storage.py
+++ b/test_runner/batch_others/test_remote_storage.py
@@ -6,7 +6,7 @@ from contextlib import closing
 from pathlib import Path
 import time
 from uuid import UUID
-from fixtures.zenith_fixtures import ZenithEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
 from fixtures.log_helper import log
 from fixtures.utils import lsn_from_hex, lsn_to_hex
 import pytest
@@ -30,12 +30,15 @@ import pytest
 #
 # The tests are done for all types of remote storage pageserver supports.
@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
-def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
-    # zenith_env_builder.rust_log_override = 'debug'
+def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, storage_type: str):
+    # Use this test to check more realistic SK ids: some etcd key parsing bugs were related,
+    # and this test needs SK to write data to pageserver, so it will be visible
+    neon_env_builder.safekeepers_id_start = 12
+
    if storage_type == 'local_fs':
-        zenith_env_builder.enable_local_fs_remote_storage()
+        neon_env_builder.enable_local_fs_remote_storage()
    elif storage_type == 'mock_s3':
-        zenith_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
+        neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
    else:
        raise RuntimeError(f'Unknown storage type: {storage_type}')

@@ -43,7 +46,7 @@ def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder,
    data_secret = 'very secret secret'

    ##### First start, insert secret data and upload it to the remote storage
-    env = zenith_env_builder.init_start()
+    env = neon_env_builder.init_start()
    pg = env.postgres.create_start('main')

    client = env.pageserver.http_client()
--- a/test_runner/batch_others/test_restart_compute.py
+++ b/test_runner/batch_others/test_restart_compute.py
@@ -1,7 +1,7 @@
 import pytest

 from contextlib import closing
-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log


@@ -9,13 +9,13 @@ from fixtures.log_helper import log
 # Test restarting and recreating a postgres instance
 #
@pytest.mark.parametrize('with_safekeepers', [False, True])
-def test_restart_compute(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
-    zenith_env_builder.pageserver_auth_enabled = True
+def test_restart_compute(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
+    neon_env_builder.auth_enabled = True
    if with_safekeepers:
-        zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+        neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_restart_compute')
+    env.neon_cli.create_branch('test_restart_compute')
    pg = env.postgres.create_start('test_restart_compute')
    log.info("postgres is running on 'test_restart_compute' branch")

--- a/test_runner/batch_others/test_subxacts.py
+++ b/test_runner/batch_others/test_subxacts.py
@@ -1,4 +1,4 @@
-from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
+from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
 from fixtures.log_helper import log


@@ -6,11 +6,11 @@ from fixtures.log_helper import log
 #
 # The pg_subxact SLRU is not preserved on restarts, and doesn't need to be
 # maintained in the pageserver, so subtransactions are not very exciting for
-# Zenith. They are included in the commit record though and updated in the
+# Neon. They are included in the commit record though and updated in the
 # CLOG.
-def test_subxacts(zenith_simple_env: ZenithEnv, test_output_dir):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_subxacts", "empty")
+def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_subxacts", "empty")
    pg = env.postgres.create_start('test_subxacts')

    log.info("postgres is running on 'test_subxacts' branch")
--- a/test_runner/batch_others/test_tenant_conf.py
+++ b/test_runner/batch_others/test_tenant_conf.py
@@ -3,25 +3,25 @@ from contextlib import closing
 import pytest
 import psycopg2.extras

-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log


-def test_tenant_config(zenith_env_builder: ZenithEnvBuilder):
+def test_tenant_config(neon_env_builder: NeonEnvBuilder):
    # set some non-default global config
-    zenith_env_builder.pageserver_config_override = '''
+    neon_env_builder.pageserver_config_override = '''
 page_cache_size=444;
 wait_lsn_timeout='111 s';
 tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''

-    env = zenith_env_builder.init_start()
+    env = neon_env_builder.init_start()
    """Test per tenant configuration"""
-    tenant, _ = env.zenith_cli.create_tenant(conf={
+    tenant, _ = env.neon_cli.create_tenant(conf={
        'checkpoint_distance': '20000',
        'gc_period': '30sec',
    })

-    env.zenith_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant)
+    env.neon_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant)
    pg = env.postgres.create_start(
        "test_tenant_conf",
        "main",
@@ -66,11 +66,11 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
                }.items())

    # update the config and ensure that it has changed
-    env.zenith_cli.config_tenant(tenant_id=tenant,
-                                 conf={
-                                     'checkpoint_distance': '15000',
-                                     'gc_period': '80sec',
-                                 })
+    env.neon_cli.config_tenant(tenant_id=tenant,
+                               conf={
+                                   'checkpoint_distance': '15000',
+                                   'gc_period': '80sec',
+                               })

    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as pscur:
--- a/test_runner/batch_others/test_tenant_relocation.py
+++ b/test_runner/batch_others/test_tenant_relocation.py
@@ -10,7 +10,7 @@ from typing import Optional
 import signal
 import pytest

-from fixtures.zenith_fixtures import PgProtocol, PortDistributor, Postgres, ZenithEnvBuilder, Etcd, ZenithPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, zenith_binpath, pg_distrib_dir
+from fixtures.neon_fixtures import PgProtocol, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir
 from fixtures.utils import lsn_from_hex


@@ -26,7 +26,7 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
                          http_port: int,
                          broker: Optional[Etcd]):
    """
-    cannot use ZenithPageserver yet because it depends on zenith cli
+    cannot use NeonPageserver yet because it depends on neon cli
    which currently lacks support for multiple pageservers
    """
    cmd = [
@@ -106,21 +106,21 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
    "needs to replace callmemaybe call with better idea how to migrate timelines between pageservers"
 )
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
-def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
+def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
                           port_distributor: PortDistributor,
                           with_load: str):
-    zenith_env_builder.enable_local_fs_remote_storage()
+    neon_env_builder.enable_local_fs_remote_storage()

-    env = zenith_env_builder.init_start()
+    env = neon_env_builder.init_start()

    # create folder for remote storage mock
    remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'

-    tenant, _ = env.zenith_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
+    tenant, _ = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
    log.info("tenant to relocate %s", tenant)

    # attach does not download ancestor branches (should it?), just use root branch for now
-    env.zenith_cli.create_root_branch('test_tenant_relocation', tenant_id=tenant)
+    env.neon_cli.create_root_branch('test_tenant_relocation', tenant_id=tenant)

    tenant_pg = env.postgres.create_start(branch_name='test_tenant_relocation',
                                          node_name='test_tenant_relocation',
@@ -177,16 +177,16 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
    new_pageserver_pg_port = port_distributor.get_port()
    new_pageserver_http_port = port_distributor.get_port()
    log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port)
-    pageserver_bin = pathlib.Path(zenith_binpath) / 'pageserver'
+    pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver'

-    new_pageserver_http = ZenithPageserverHttpClient(port=new_pageserver_http_port, auth_token=None)
+    new_pageserver_http = NeonPageserverHttpClient(port=new_pageserver_http_port, auth_token=None)

    with new_pageserver_helper(new_pageserver_dir,
                               pageserver_bin,
                               remote_storage_mock_path,
                               new_pageserver_pg_port,
                               new_pageserver_http_port,
-                               zenith_env_builder.broker):
+                               neon_env_builder.broker):

        # call to attach timeline to new pageserver
        new_pageserver_http.timeline_attach(tenant, timeline)
@@ -215,7 +215,7 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,

        tenant_pg.stop()

-        # rewrite zenith cli config to use new pageserver for basebackup to start new compute
+        # rewrite neon cli config to use new pageserver for basebackup to start new compute
        cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
        cli_config_lines[-2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'"
        cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
@@ -258,7 +258,7 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,

        assert not os.path.exists(timeline_to_detach_local_path), f'After detach, local timeline dir {timeline_to_detach_local_path} should be removed'

-        # bring old pageserver back for clean shutdown via zenith cli
+        # bring old pageserver back for clean shutdown via neon cli
        # new pageserver will be shut down by the context manager
        cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
        cli_config_lines[-2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'"
--- a/test_runner/batch_others/test_tenants.py
+++ b/test_runner/batch_others/test_tenants.py
@@ -2,27 +2,30 @@ from contextlib import closing
 from datetime import datetime
 import os
 import pytest
+import time
+from uuid import UUID

-from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log
 from fixtures.metrics import parse_metrics
 from fixtures.utils import lsn_to_hex
+from fixtures.benchmark_fixture import MetricReport


@pytest.mark.parametrize('with_safekeepers', [False, True])
-def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
+def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
    if with_safekeepers:
-        zenith_env_builder.num_safekeepers = 3
+        neon_env_builder.num_safekeepers = 3

-    env = zenith_env_builder.init_start()
+    env = neon_env_builder.init_start()
    """Tests tenants with and without wal acceptors"""
-    tenant_1, _ = env.zenith_cli.create_tenant()
-    tenant_2, _ = env.zenith_cli.create_tenant()
+    tenant_1, _ = env.neon_cli.create_tenant()
+    tenant_2, _ = env.neon_cli.create_tenant()

-    env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
-                                   tenant_id=tenant_1)
-    env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
-                                   tenant_id=tenant_2)
+    env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
+                                 tenant_id=tenant_1)
+    env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
+                                 tenant_id=tenant_2)

    pg_tenant1 = env.postgres.create_start(
        f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
@@ -44,15 +47,65 @@ def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_safekeep
                assert cur.fetchone() == (5000050000, )


-def test_metrics_normal_work(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 3
+def test_tenant_threads(neon_env_builder, zenbenchmark):
+    neon_env_builder.num_safekeepers = 1
+    env = neon_env_builder.init_start()

-    env = zenith_env_builder.init_start()
-    tenant_1, _ = env.zenith_cli.create_tenant()
-    tenant_2, _ = env.zenith_cli.create_tenant()
+    def get_num_threads() -> int:
+        metrics = env.pageserver.http_client().get_metrics()
+        parsed = parse_metrics(metrics)
+        threads = parsed.query_one("process_threads").value
+        return threads

-    timeline_1 = env.zenith_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1)
-    timeline_2 = env.zenith_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2)
+    threads_before = get_num_threads()
+    zenbenchmark.record("threads_before", threads_before, "", report=MetricReport.LOWER_IS_BETTER)
+
+    tenants = env.pageserver.http_client().tenant_list()
+    num_tenants = len(tenants)
+    num_active = len([t for t in tenants if t["state"] == "Active"])
+    zenbenchmark.record("tenants_before", num_tenants, "", report=MetricReport.LOWER_IS_BETTER)
+    zenbenchmark.record("active_before", num_active, "", report=MetricReport.LOWER_IS_BETTER)
+
+    for i in range(20):
+        print(f"creating tenant {i}")
+        name = f"test_tenant_threads_{i}"
+        tenant, _ = env.neon_cli.create_tenant()
+
+
+        timeline = env.neon_cli.create_timeline(name, tenant_id=tenant)
+        pg = env.postgres.create_start(name, tenant_id=tenant)
+        pg.safe_psql("select 1;")
+        pg.stop()
+        env.pageserver.http_client().timeline_detach(tenant, timeline)
+
+        remaining_timelines = [
+            UUID(r["timeline_id"])
+            for r in env.pageserver.http_client().timeline_list(tenant)
+        ]
+        for t in remaining_timelines:
+            env.pageserver.http_client().timeline_detach(tenant, t)
+
+    time.sleep(5)
+
+    threads_after = get_num_threads()
+    zenbenchmark.record("threads_before", threads_after, "", report=MetricReport.LOWER_IS_BETTER)
+
+    tenants = env.pageserver.http_client().tenant_list()
+    num_tenants = len(tenants)
+    num_active = len([t for t in tenants if t["state"] == "Active"])
+    zenbenchmark.record("tenants_after", num_tenants, "", report=MetricReport.LOWER_IS_BETTER)
+    zenbenchmark.record("active_after", num_active, "", report=MetricReport.LOWER_IS_BETTER)
+
+
+def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 3
+
+    env = neon_env_builder.init_start()
+    tenant_1, _ = env.neon_cli.create_tenant()
+    tenant_2, _ = env.neon_cli.create_tenant()
+
+    timeline_1 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1)
+    timeline_2 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2)

    pg_tenant1 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_1)
    pg_tenant2 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_2)
@@ -72,7 +125,7 @@ def test_metrics_normal_work(zenith_env_builder: ZenithEnvBuilder):
        collected_metrics[f'safekeeper{sk.id}'] = sk.http_client().get_metrics_str()

    for name in collected_metrics:
-        basepath = os.path.join(zenith_env_builder.repo_dir, f'{name}.metrics')
+        basepath = os.path.join(neon_env_builder.repo_dir, f'{name}.metrics')

        with open(basepath, 'w') as stdout_f:
            print(collected_metrics[name], file=stdout_f, flush=True)
--- a/test_runner/batch_others/test_tenants_with_remote_storage.py
+++ b/test_runner/batch_others/test_tenants_with_remote_storage.py
@@ -12,11 +12,11 @@ from uuid import UUID

 import pytest

-from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithEnv, Postgres, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, wait_for_last_record_lsn, wait_for_upload
 from fixtures.utils import lsn_from_hex


-async def tenant_workload(env: ZenithEnv, pg: Postgres):
+async def tenant_workload(env: NeonEnv, pg: Postgres):
    pageserver_conn = await env.pageserver.connect_async()

    pg_conn = await pg.connect_async()
@@ -35,7 +35,7 @@ async def tenant_workload(env: ZenithEnv, pg: Postgres):
        assert res == i * 1000


-async def all_tenants_workload(env: ZenithEnv, tenants_pgs):
+async def all_tenants_workload(env: NeonEnv, tenants_pgs):
    workers = []
    for tenant, pg in tenants_pgs:
        worker = tenant_workload(env, pg)
@@ -46,28 +46,28 @@ async def all_tenants_workload(env: ZenithEnv, tenants_pgs):


@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
-def test_tenants_many(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
+def test_tenants_many(neon_env_builder: NeonEnvBuilder, storage_type: str):

    if storage_type == 'local_fs':
-        zenith_env_builder.enable_local_fs_remote_storage()
+        neon_env_builder.enable_local_fs_remote_storage()
    elif storage_type == 'mock_s3':
-        zenith_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
+        neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
    else:
        raise RuntimeError(f'Unknown storage type: {storage_type}')

-    zenith_env_builder.enable_local_fs_remote_storage()
+    neon_env_builder.enable_local_fs_remote_storage()

-    env = zenith_env_builder.init_start()
+    env = neon_env_builder.init_start()

    tenants_pgs = []

    for i in range(1, 5):
        # Use a tiny checkpoint distance, to create a lot of layers quickly
-        tenant, _ = env.zenith_cli.create_tenant(
+        tenant, _ = env.neon_cli.create_tenant(
            conf={
                'checkpoint_distance': '5000000',
                })
-        env.zenith_cli.create_timeline(f'test_tenants_many', tenant_id=tenant)
+        env.neon_cli.create_timeline(f'test_tenants_many', tenant_id=tenant)

        pg = env.postgres.create_start(
            f'test_tenants_many',
--- a/test_runner/batch_others/test_timeline_size.py
+++ b/test_runner/batch_others/test_timeline_size.py
@@ -1,15 +1,15 @@
 from contextlib import closing
 import psycopg2.extras
 import psycopg2.errors
-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres, assert_local
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_local
 from fixtures.log_helper import log
 import time


-def test_timeline_size(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_timeline_size(neon_simple_env: NeonEnv):
+    env = neon_simple_env
    # Branch at the point where only 100 rows were inserted
-    new_timeline_id = env.zenith_cli.create_branch('test_timeline_size', 'empty')
+    new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')

    client = env.pageserver.http_client()
    timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
@@ -69,9 +69,9 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60
        time.sleep(polling_interval)


-def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init_start()
-    new_timeline_id = env.zenith_cli.create_branch('test_timeline_size_quota')
+def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()
+    new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota')

    client = env.pageserver.http_client()
    res = assert_local(client, env.initial_tenant, new_timeline_id)
@@ -86,7 +86,7 @@ def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):

    with closing(pgmain.connect()) as conn:
        with conn.cursor() as cur:
-            cur.execute("CREATE EXTENSION neon")  # TODO move it to zenith_fixtures?
+            cur.execute("CREATE EXTENSION neon")  # TODO move it to neon_fixtures?

            cur.execute("CREATE TABLE foo (t text)")

--- a/test_runner/batch_others/test_twophase.py
+++ b/test_runner/batch_others/test_twophase.py
@@ -1,15 +1,15 @@
 import os

-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.log_helper import log


 #
 # Test branching, when a transaction is in prepared state
 #
-def test_twophase(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
-    env.zenith_cli.create_branch("test_twophase", "empty")
+def test_twophase(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_twophase", "empty")
    pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
    log.info("postgres is running on 'test_twophase' branch")

@@ -55,7 +55,7 @@ def test_twophase(zenith_simple_env: ZenithEnv):
    assert len(twophase_files) == 2

    # Create a branch with the transaction in prepared state
-    env.zenith_cli.create_branch("test_twophase_prepared", "test_twophase")
+    env.neon_cli.create_branch("test_twophase_prepared", "test_twophase")

    # Start compute on the new branch
    pg2 = env.postgres.create_start(
--- a/test_runner/batch_others/test_vm_bits.py
+++ b/test_runner/batch_others/test_vm_bits.py
@@ -1,4 +1,4 @@
-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.log_helper import log


@@ -6,10 +6,10 @@ from fixtures.log_helper import log
 # Test that the VM bit is cleared correctly at a HEAP_DELETE and
 # HEAP_UPDATE record.
 #
-def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
-    env = zenith_simple_env
+def test_vm_bit_clear(neon_simple_env: NeonEnv):
+    env = neon_simple_env

-    env.zenith_cli.create_branch("test_vm_bit_clear", "empty")
+    env.neon_cli.create_branch("test_vm_bit_clear", "empty")
    pg = env.postgres.create_start('test_vm_bit_clear')

    log.info("postgres is running on 'test_vm_bit_clear' branch")
@@ -33,7 +33,7 @@ def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
    cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1')

    # Branch at this point, to test that later
-    env.zenith_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")
+    env.neon_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")

    # Clear the buffer cache, to force the VM page to be re-fetched from
    # the page server
--- a/test_runner/batch_others/test_wal_acceptor.py
+++ b/test_runner/batch_others/test_wal_acceptor.py
@@ -12,10 +12,11 @@ from contextlib import closing
 from dataclasses import dataclass, field
 from multiprocessing import Process, Value
 from pathlib import Path
-from fixtures.zenith_fixtures import PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, ZenithEnv, ZenithEnvBuilder, PortDistributor, SafekeeperPort, zenith_binpath, PgProtocol
+from fixtures.neon_fixtures import PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, neon_binpath, PgProtocol
 from fixtures.utils import get_dir_size, lsn_to_hex, mkdir_if_needed, lsn_from_hex
 from fixtures.log_helper import log
 from typing import List, Optional, Any
+from uuid import uuid4


@dataclass
@@ -29,9 +30,9 @@ class TimelineMetrics:

 # Run page server and multiple acceptors, and multiple compute nodes running
 # against different timelines.
-def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+def test_many_timelines(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

    n_timelines = 3

@@ -39,15 +40,15 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
        "test_safekeepers_many_timelines_{}".format(tlin) for tlin in range(n_timelines)
    ]
    # pageserver, safekeeper operate timelines via their ids (can be represented in hex as 'ad50847381e248feaac9876cc71ae418')
-    # that's not really human readable, so the branch names are introduced in Zenith CLI.
-    # Zenith CLI stores its branch <-> timeline mapping in its internals,
+    # that's not really human readable, so the branch names are introduced in Neon CLI.
+    # Neon CLI stores its branch <-> timeline mapping in its internals,
    # but we need this to collect metrics from other servers, related to the timeline.
    branch_names_to_timeline_ids = {}

    # start postgres on each timeline
    pgs = []
    for branch_name in branch_names:
-        new_timeline_id = env.zenith_cli.create_branch(branch_name)
+        new_timeline_id = env.neon_cli.create_branch(branch_name)
        pgs.append(env.postgres.create_start(branch_name))
        branch_names_to_timeline_ids[branch_name] = new_timeline_id

@@ -93,14 +94,14 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
            # the compute node, which only happens after a consensus of safekeepers
            # has confirmed the transaction. We assume majority consensus here.
            assert (2 * sum(m.last_record_lsn <= lsn
-                            for lsn in m.flush_lsns) > zenith_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
+                            for lsn in m.flush_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
            assert (2 * sum(m.last_record_lsn <= lsn
-                            for lsn in m.commit_lsns) > zenith_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
+                            for lsn in m.commit_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
            timeline_metrics.append(m)
        log.info(f"{message}: {timeline_metrics}")
        return timeline_metrics

-    # TODO: https://github.com/zenithdb/zenith/issues/809
+    # TODO: https://github.com/neondatabase/neon/issues/809
    # collect_metrics("before CREATE TABLE")

    # Do everything in different loops to have actions on different timelines
@@ -168,15 +169,15 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
 # Check that dead minority doesn't prevent the commits: execute insert n_inserts
 # times, with fault_probability chance of getting a wal acceptor down or up
 # along the way. 2 of 3 are always alive, so the work keeps going.
-def test_restarts(zenith_env_builder: ZenithEnvBuilder):
+def test_restarts(neon_env_builder: NeonEnvBuilder):
    fault_probability = 0.01
    n_inserts = 1000
    n_acceptors = 3

-    zenith_env_builder.num_safekeepers = n_acceptors
-    env = zenith_env_builder.init_start()
+    neon_env_builder.num_safekeepers = n_acceptors
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_safekeepers_restarts')
+    env.neon_cli.create_branch('test_safekeepers_restarts')
    pg = env.postgres.create_start('test_safekeepers_restarts')

    # we rely upon autocommit after each statement
@@ -209,11 +210,11 @@ def delayed_safekeeper_start(wa):


 # When majority of acceptors is offline, commits are expected to be frozen
-def test_unavailability(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 2
-    env = zenith_env_builder.init_start()
+def test_unavailability(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 2
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_safekeepers_unavailability')
+    env.neon_cli.create_branch('test_safekeepers_unavailability')
    pg = env.postgres.create_start('test_safekeepers_unavailability')

    # we rely upon autocommit after each statement
@@ -279,12 +280,12 @@ def stop_value():


 # do inserts while concurrently getting up/down subsets of acceptors
-def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):
+def test_race_conditions(neon_env_builder: NeonEnvBuilder, stop_value):

-    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_safekeepers_race_conditions')
+    env.neon_cli.create_branch('test_safekeepers_race_conditions')
    pg = env.postgres.create_start('test_safekeepers_race_conditions')

    # we rely upon autocommit after each statement
@@ -308,16 +309,16 @@ def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):


 # Test that safekeepers push their info to the broker and learn peer status from it
-def test_broker(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 3
-    zenith_env_builder.enable_local_fs_remote_storage()
-    env = zenith_env_builder.init_start()
+def test_broker(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 3
+    neon_env_builder.enable_local_fs_remote_storage()
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch("test_broker", "main")
+    env.neon_cli.create_branch("test_broker", "main")
    pg = env.postgres.create_start('test_broker')
    pg.safe_psql("CREATE TABLE t(key int primary key, value text)")

-    # learn zenith timeline from compute
+    # learn neon timeline from compute
    tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
    timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]

@@ -349,13 +350,15 @@ def test_broker(zenith_env_builder: ZenithEnvBuilder):


 # Test that old WAL consumed by peers and pageserver is removed from safekeepers.
-def test_wal_removal(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 2
+@pytest.mark.parametrize('auth_enabled', [False, True])
+def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
+    neon_env_builder.num_safekeepers = 2
    # to advance remote_consistent_llsn
-    zenith_env_builder.enable_local_fs_remote_storage()
-    env = zenith_env_builder.init_start()
+    neon_env_builder.enable_local_fs_remote_storage()
+    neon_env_builder.auth_enabled = auth_enabled
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_safekeepers_wal_removal')
+    env.neon_cli.create_branch('test_safekeepers_wal_removal')
    pg = env.postgres.create_start('test_safekeepers_wal_removal')

    with closing(pg.connect()) as conn:
@@ -369,7 +372,10 @@ def test_wal_removal(zenith_env_builder: ZenithEnvBuilder):
    timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]

    # force checkpoint to advance remote_consistent_lsn
-    with closing(env.pageserver.connect()) as psconn:
+    pageserver_conn_options = {}
+    if auth_enabled:
+        pageserver_conn_options['password'] = env.auth_keys.generate_tenant_token(tenant_id)
+    with closing(env.pageserver.connect(**pageserver_conn_options)) as psconn:
        with psconn.cursor() as pscur:
            pscur.execute(f"checkpoint {tenant_id} {timeline_id}")

@@ -380,9 +386,29 @@ def test_wal_removal(zenith_env_builder: ZenithEnvBuilder):
    ]
    assert all(os.path.exists(p) for p in first_segments)

-    http_cli = env.safekeepers[0].http_client()
+    if not auth_enabled:
+        http_cli = env.safekeepers[0].http_client()
+    else:
+        http_cli = env.safekeepers[0].http_client(
+            auth_token=env.auth_keys.generate_tenant_token(tenant_id))
+        http_cli_other = env.safekeepers[0].http_client(
+            auth_token=env.auth_keys.generate_tenant_token(uuid4().hex))
+        http_cli_noauth = env.safekeepers[0].http_client()
+
    # Pretend WAL is offloaded to s3.
+    if auth_enabled:
+        old_backup_lsn = http_cli.timeline_status(tenant_id=tenant_id,
+                                                  timeline_id=timeline_id).backup_lsn
+        assert 'FFFFFFFF/FEFFFFFF' != old_backup_lsn
+        for cli in [http_cli_other, http_cli_noauth]:
+            with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'):
+                cli.record_safekeeper_info(tenant_id,
+                                           timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'})
+        assert old_backup_lsn == http_cli.timeline_status(tenant_id=tenant_id,
+                                                          timeline_id=timeline_id).backup_lsn
    http_cli.record_safekeeper_info(tenant_id, timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'})
+    assert 'FFFFFFFF/FEFFFFFF' == http_cli.timeline_status(tenant_id=tenant_id,
+                                                           timeline_id=timeline_id).backup_lsn

    # wait till first segment is removed on all safekeepers
    started_at = time.time()
@@ -412,22 +438,22 @@ def wait_segment_offload(tenant_id, timeline_id, live_sk, seg_end):


@pytest.mark.parametrize('storage_type', ['mock_s3', 'local_fs'])
-def test_wal_backup(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
-    zenith_env_builder.num_safekeepers = 3
+def test_wal_backup(neon_env_builder: NeonEnvBuilder, storage_type: str):
+    neon_env_builder.num_safekeepers = 3
    if storage_type == 'local_fs':
-        zenith_env_builder.enable_local_fs_remote_storage()
+        neon_env_builder.enable_local_fs_remote_storage()
    elif storage_type == 'mock_s3':
-        zenith_env_builder.enable_s3_mock_remote_storage('test_safekeepers_wal_backup')
+        neon_env_builder.enable_s3_mock_remote_storage('test_safekeepers_wal_backup')
    else:
        raise RuntimeError(f'Unknown storage type: {storage_type}')
-    zenith_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
+    neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER

-    env = zenith_env_builder.init_start()
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_safekeepers_wal_backup')
+    env.neon_cli.create_branch('test_safekeepers_wal_backup')
    pg = env.postgres.create_start('test_safekeepers_wal_backup')

-    # learn zenith timeline from compute
+    # learn neon timeline from compute
    tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
    timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]

@@ -460,7 +486,7 @@ def test_wal_backup(zenith_env_builder: ZenithEnvBuilder, storage_type: str):


 class ProposerPostgres(PgProtocol):
-    """Object for running postgres without ZenithEnv"""
+    """Object for running postgres without NeonEnv"""
    def __init__(self,
                 pgdata_dir: str,
                 pg_bin,
@@ -542,14 +568,14 @@ class ProposerPostgres(PgProtocol):


 # insert wal in all safekeepers and run sync on proposer
-def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder,
+def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder,
                          pg_bin: PgBin,
                          port_distributor: PortDistributor):

    # We don't really need the full environment for this test, just the
    # safekeepers would be enough.
-    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

    timeline_id = uuid.uuid4()
    tenant_id = uuid.uuid4()
@@ -596,25 +622,42 @@ def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder,
    assert all(lsn_after_sync == lsn for lsn in lsn_after_append)


-def test_timeline_status(zenith_env_builder: ZenithEnvBuilder):
-    env = zenith_env_builder.init_start()
+@pytest.mark.parametrize('auth_enabled', [False, True])
+def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
+    neon_env_builder.auth_enabled = auth_enabled
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_timeline_status')
+    env.neon_cli.create_branch('test_timeline_status')
    pg = env.postgres.create_start('test_timeline_status')

    wa = env.safekeepers[0]
-    wa_http_cli = wa.http_client()
-    wa_http_cli.check_status()

-    # learn zenith timeline from compute
+    # learn neon timeline from compute
    tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
    timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]

+    if not auth_enabled:
+        wa_http_cli = wa.http_client()
+        wa_http_cli.check_status()
+    else:
+        wa_http_cli = wa.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id))
+        wa_http_cli.check_status()
+        wa_http_cli_bad = wa.http_client(
+            auth_token=env.auth_keys.generate_tenant_token(uuid4().hex))
+        wa_http_cli_bad.check_status()
+        wa_http_cli_noauth = wa.http_client()
+        wa_http_cli_noauth.check_status()
+
    # fetch something sensible from status
    tli_status = wa_http_cli.timeline_status(tenant_id, timeline_id)
    epoch = tli_status.acceptor_epoch
    timeline_start_lsn = tli_status.timeline_start_lsn

+    if auth_enabled:
+        for cli in [wa_http_cli_bad, wa_http_cli_noauth]:
+            with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'):
+                cli.timeline_status(tenant_id, timeline_id)
+
    pg.safe_psql("create table t(i int)")

    # ensure epoch goes up after reboot
@@ -642,7 +685,7 @@ class SafekeeperEnv:
                           peer_port=self.port_distributor.get_port())
        self.pg_bin = pg_bin
        self.num_safekeepers = num_safekeepers
-        self.bin_safekeeper = os.path.join(str(zenith_binpath), 'safekeeper')
+        self.bin_safekeeper = os.path.join(str(neon_binpath), 'safekeeper')
        self.safekeepers: Optional[List[subprocess.CompletedProcess[Any]]] = None
        self.postgres: Optional[ProposerPostgres] = None
        self.tenant_id: Optional[uuid.UUID] = None
@@ -753,8 +796,8 @@ def test_safekeeper_without_pageserver(test_output_dir: str,
        assert res == 5050


-def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
-    def safekeepers_guc(env: ZenithEnv, sk_names: List[int]) -> str:
+def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
+    def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str:
        return ','.join([f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.id in sk_names])

    def execute_payload(pg: Postgres):
@@ -781,9 +824,9 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
            except Exception as e:
                log.info(f"Safekeeper {sk.id} status error: {e}")

-    zenith_env_builder.num_safekeepers = 4
-    env = zenith_env_builder.init_start()
-    env.zenith_cli.create_branch('test_replace_safekeeper')
+    neon_env_builder.num_safekeepers = 4
+    env = neon_env_builder.init_start()
+    env.neon_cli.create_branch('test_replace_safekeeper')

    log.info("Use only first 3 safekeepers")
    env.safekeepers[3].stop()
@@ -792,7 +835,7 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
    pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
    pg.start()

-    # learn zenith timeline from compute
+    # learn neon timeline from compute
    tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
    timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]

@@ -844,7 +887,7 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
 # We have `wal_keep_size=0`, so postgres should trim WAL once it's broadcasted
 # to all safekeepers. This test checks that compute WAL can fit into small number
 # of WAL segments.
-def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
+def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
    # used to calculate delta in collect_stats
    last_lsn = .0

@@ -866,10 +909,10 @@ def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
    def generate_wal(cur):
        cur.execute("INSERT INTO t SELECT generate_series(1,300000), 'payload'")

-    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_wal_deleted_after_broadcast')
+    env.neon_cli.create_branch('test_wal_deleted_after_broadcast')
    # Adjust checkpoint config to prevent keeping old WAL segments
    pg = env.postgres.create_start(
        'test_wal_deleted_after_broadcast',
@@ -894,18 +937,20 @@ def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
    assert wal_size_after_checkpoint < 16 * 2.5


-def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 1
-    env = zenith_env_builder.init_start()
+@pytest.mark.parametrize('auth_enabled', [False, True])
+def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
+    neon_env_builder.num_safekeepers = 1
+    neon_env_builder.auth_enabled = auth_enabled
+    env = neon_env_builder.init_start()

    # Create two tenants: one will be deleted, other should be preserved.
    tenant_id = env.initial_tenant.hex
-    timeline_id_1 = env.zenith_cli.create_branch('br1').hex  # Active, delete explicitly
-    timeline_id_2 = env.zenith_cli.create_branch('br2').hex  # Inactive, delete explicitly
-    timeline_id_3 = env.zenith_cli.create_branch('br3').hex  # Active, delete with the tenant
-    timeline_id_4 = env.zenith_cli.create_branch('br4').hex  # Inactive, delete with the tenant
+    timeline_id_1 = env.neon_cli.create_branch('br1').hex  # Active, delete explicitly
+    timeline_id_2 = env.neon_cli.create_branch('br2').hex  # Inactive, delete explicitly
+    timeline_id_3 = env.neon_cli.create_branch('br3').hex  # Active, delete with the tenant
+    timeline_id_4 = env.neon_cli.create_branch('br4').hex  # Inactive, delete with the tenant

-    tenant_id_other_uuid, timeline_id_other_uuid = env.zenith_cli.create_tenant()
+    tenant_id_other_uuid, timeline_id_other_uuid = env.neon_cli.create_tenant()
    tenant_id_other = tenant_id_other_uuid.hex
    timeline_id_other = timeline_id_other_uuid.hex

@@ -921,7 +966,14 @@ def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
                cur.execute('CREATE TABLE t(key int primary key)')
    sk = env.safekeepers[0]
    sk_data_dir = Path(sk.data_dir())
-    sk_http = sk.http_client()
+    if not auth_enabled:
+        sk_http = sk.http_client()
+        sk_http_other = sk_http
+    else:
+        sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id))
+        sk_http_other = sk.http_client(
+            auth_token=env.auth_keys.generate_tenant_token(tenant_id_other))
+        sk_http_noauth = sk.http_client()
    assert (sk_data_dir / tenant_id / timeline_id_1).is_dir()
    assert (sk_data_dir / tenant_id / timeline_id_2).is_dir()
    assert (sk_data_dir / tenant_id / timeline_id_3).is_dir()
@@ -961,6 +1013,15 @@ def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
    assert (sk_data_dir / tenant_id / timeline_id_4).is_dir()
    assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()

+    if auth_enabled:
+        # Ensure we cannot delete the other tenant
+        for sk_h in [sk_http, sk_http_noauth]:
+            with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'):
+                assert sk_h.timeline_delete_force(tenant_id_other, timeline_id_other)
+            with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'):
+                assert sk_h.tenant_delete_force(tenant_id_other)
+        assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()
+
    # Remove initial tenant's br2 (inactive)
    assert sk_http.timeline_delete_force(tenant_id, timeline_id_2) == {
        "dir_existed": True,
@@ -1001,7 +1062,7 @@ def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
    assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()

    # Ensure the other tenant still works
-    sk_http.timeline_status(tenant_id_other, timeline_id_other)
+    sk_http_other.timeline_status(tenant_id_other, timeline_id_other)
    with closing(pg_other.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('INSERT INTO t (key) VALUES (123)')
--- a/test_runner/batch_others/test_wal_acceptor_async.py
+++ b/test_runner/batch_others/test_wal_acceptor_async.py
@@ -4,7 +4,7 @@ import asyncpg
 import random
 import time

-from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres, Safekeeper
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
 from fixtures.log_helper import getLogger
 from fixtures.utils import lsn_from_hex, lsn_to_hex
 from typing import List
@@ -136,7 +136,7 @@ async def wait_for_lsn(safekeeper: Safekeeper,
 # On each iteration 1 acceptor is stopped, and 2 others should allow
 # background workers execute transactions. In the end, state should remain
 # consistent.
-async def run_restarts_under_load(env: ZenithEnv,
+async def run_restarts_under_load(env: NeonEnv,
                                  pg: Postgres,
                                  acceptors: List[Safekeeper],
                                  n_workers=10,
@@ -202,11 +202,11 @@ async def run_restarts_under_load(env: ZenithEnv,


 # Restart acceptors one by one, while executing and validating bank transactions
-def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_safekeepers_restarts_under_load')
+    env.neon_cli.create_branch('test_safekeepers_restarts_under_load')
    # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
    pg = env.postgres.create_start('test_safekeepers_restarts_under_load',
                                   config_lines=['max_replication_write_lag=1MB'])
@@ -217,11 +217,11 @@ def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
 # Restart acceptors one by one and test that everything is working as expected
 # when checkpoins are triggered frequently by max_wal_size=32MB. Because we have
 # wal_keep_size=0, there will be aggressive WAL segments recycling.
-def test_restarts_frequent_checkpoints(zenith_env_builder: ZenithEnvBuilder):
-    zenith_env_builder.num_safekeepers = 3
-    env = zenith_env_builder.init_start()
+def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()

-    env.zenith_cli.create_branch('test_restarts_frequent_checkpoints')
+    env.neon_cli.create_branch('test_restarts_frequent_checkpoints')
    # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
    pg = env.postgres.create_start('test_restarts_frequent_checkpoints',
                                   config_lines=[
--- a/test_runner/batch_others/test_wal_restore.py
+++ b/test_runner/batch_others/test_wal_restore.py
@@ -1,26 +1,26 @@
 import os
 import subprocess

-from fixtures.zenith_fixtures import (ZenithEnvBuilder,
-                                      VanillaPostgres,
-                                      PortDistributor,
-                                      PgBin,
-                                      base_dir,
-                                      vanilla_pg,
-                                      pg_distrib_dir)
+from fixtures.neon_fixtures import (NeonEnvBuilder,
+                                    VanillaPostgres,
+                                    PortDistributor,
+                                    PgBin,
+                                    base_dir,
+                                    vanilla_pg,
+                                    pg_distrib_dir)
 from fixtures.log_helper import log


-def test_wal_restore(zenith_env_builder: ZenithEnvBuilder,
+def test_wal_restore(neon_env_builder: NeonEnvBuilder,
                     pg_bin: PgBin,
                     test_output_dir,
                     port_distributor: PortDistributor):
-    env = zenith_env_builder.init_start()
-    env.zenith_cli.create_branch("test_wal_restore")
+    env = neon_env_builder.init_start()
+    env.neon_cli.create_branch("test_wal_restore")
    pg = env.postgres.create_start('test_wal_restore')
    pg.safe_psql("create table t as select generate_series(1,300000)")
    tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
-    env.zenith_cli.pageserver_stop()
+    env.neon_cli.pageserver_stop()
    port = port_distributor.get_port()
    data_dir = os.path.join(test_output_dir, 'pgsql.restored')
    with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored:
--- a/test_runner/batch_pg_regress/test_isolation.py
+++ b/test_runner/batch_pg_regress/test_isolation.py
@@ -1,16 +1,16 @@
 import os
 import pytest
 from fixtures.utils import mkdir_if_needed
-from fixtures.zenith_fixtures import ZenithEnv, base_dir, pg_distrib_dir
+from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir


 # The isolation tests run for a long time, especially in debug mode,
 # so use a larger-than-default timeout.
@pytest.mark.timeout(1800)
-def test_isolation(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
-    env = zenith_simple_env
+def test_isolation(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
+    env = neon_simple_env

-    env.zenith_cli.create_branch("test_isolation", "empty")
+    env.neon_cli.create_branch("test_isolation", "empty")
    # Connect to postgres and create a database called "regression".
    # isolation tests use prepared transactions, so enable them
    pg = env.postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100'])
--- a/test_runner/batch_pg_regress/test_zenith_regress.py
+++ b/test_runner/batch_pg_regress/test_zenith_regress.py
@@ -1,19 +1,19 @@
 import os

 from fixtures.utils import mkdir_if_needed
-from fixtures.zenith_fixtures import (ZenithEnv,
-                                      check_restored_datadir_content,
-                                      base_dir,
-                                      pg_distrib_dir)
+from fixtures.neon_fixtures import (NeonEnv,
+                                    check_restored_datadir_content,
+                                    base_dir,
+                                    pg_distrib_dir)
 from fixtures.log_helper import log


-def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
-    env = zenith_simple_env
+def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
+    env = neon_simple_env

-    env.zenith_cli.create_branch("test_zenith_regress", "empty")
+    env.neon_cli.create_branch("test_neon_regress", "empty")
    # Connect to postgres and create a database called "regression".
-    pg = env.postgres.create_start('test_zenith_regress')
+    pg = env.postgres.create_start('test_neon_regress')
    pg.safe_psql('CREATE DATABASE regression')

    # Create some local directories for pg_regress to run in.
@@ -22,9 +22,9 @@ def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, c
    mkdir_if_needed(os.path.join(runpath, 'testtablespace'))

    # Compute all the file locations that pg_regress will need.
-    # This test runs zenith specific tests
+    # This test runs neon specific tests
    build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
-    src_path = os.path.join(base_dir, 'test_runner/zenith_regress')
+    src_path = os.path.join(base_dir, 'test_runner/neon_regress')
    bindir = os.path.join(pg_distrib_dir, 'bin')
    schedule = os.path.join(src_path, 'parallel_schedule')
    pg_regress = os.path.join(build_path, 'pg_regress')
--- a/test_runner/batch_pg_regress/test_pg_regress.py
+++ b/test_runner/batch_pg_regress/test_pg_regress.py
@@ -1,16 +1,16 @@
 import os
 import pytest
 from fixtures.utils import mkdir_if_needed
-from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content, base_dir, pg_distrib_dir
+from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content, base_dir, pg_distrib_dir


 # The pg_regress tests run for a long time, especially in debug mode,
 # so use a larger-than-default timeout.
@pytest.mark.timeout(1800)
-def test_pg_regress(zenith_simple_env: ZenithEnv, test_output_dir: str, pg_bin, capsys):
-    env = zenith_simple_env
+def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: str, pg_bin, capsys):
+    env = neon_simple_env

-    env.zenith_cli.create_branch("test_pg_regress", "empty")
+    env.neon_cli.create_branch("test_pg_regress", "empty")
    # Connect to postgres and create a database called "regression".
    pg = env.postgres.create_start('test_pg_regress')
    pg.safe_psql('CREATE DATABASE regression')
--- a/test_runner/conftest.py
+++ b/test_runner/conftest.py
@@ -1,6 +1,5 @@
-pytest_plugins = (
-    "fixtures.zenith_fixtures",
-    "fixtures.benchmark_fixture",
-    "fixtures.compare_fixtures",
-    "fixtures.slow",
-)
+pytest_plugins = ("fixtures.neon_fixtures",
+                  "fixtures.benchmark_fixture",
+                  "fixtures.compare_fixtures",
+                  "fixtures.slow",
+                  "fixtures.pg_stats")
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -25,9 +25,9 @@ To use, declare the 'zenbenchmark' fixture in the test function. Run the
 bencmark, and then record the result by calling zenbenchmark.record. For example:

 import timeit
-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.neon_fixtures import NeonEnv

-def test_mybench(zenith_simple_env: env, zenbenchmark):
+def test_mybench(neon_simple_env: env, zenbenchmark):

    # Initialize the test
    ...
@@ -142,7 +142,7 @@ class MetricReport(str, enum.Enum):  # str is a hack to make it json serializabl
    LOWER_IS_BETTER = 'lower_is_better'


-class ZenithBenchmarker:
+class NeonBenchmarker:
    """
    An object for recording benchmark results. This is created for each test
    function by the zenbenchmark fixture
@@ -163,7 +163,7 @@ class ZenithBenchmarker:
        Record a benchmark result.
        """
        # just to namespace the value
-        name = f"zenith_benchmarker_{metric_name}"
+        name = f"neon_benchmarker_{metric_name}"
        self.property_recorder(
            name,
            {
@@ -289,12 +289,12 @@ class ZenithBenchmarker:


@pytest.fixture(scope="function")
-def zenbenchmark(record_property) -> Iterator[ZenithBenchmarker]:
+def zenbenchmark(record_property) -> Iterator[NeonBenchmarker]:
    """
    This is a python decorator for benchmark fixtures. It contains functions for
    recording measurements, and prints them out at the end.
    """
-    benchmarker = ZenithBenchmarker(record_property)
+    benchmarker = NeonBenchmarker(record_property)
    yield benchmarker


--- a/test_runner/fixtures/compare_fixtures.py
+++ b/test_runner/fixtures/compare_fixtures.py
@@ -1,18 +1,19 @@
 import pytest
 from contextlib import contextmanager
 from abc import ABC, abstractmethod
+from fixtures.pg_stats import PgStatTable

-from fixtures.zenith_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, ZenithEnv
-from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
+from fixtures.neon_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, NeonEnv
+from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker

 # Type-related stuff
-from typing import Iterator
+from typing import Dict, List


 class PgCompare(ABC):
    """Common interface of all postgres implementations, useful for benchmarks.

-    This class is a helper class for the zenith_with_baseline fixture. See its documentation
+    This class is a helper class for the neon_with_baseline fixture. See its documentation
    for more details.
    """
    @property
@@ -26,7 +27,7 @@ class PgCompare(ABC):
        pass

    @property
-    def zenbenchmark(self) -> ZenithBenchmarker:
+    def zenbenchmark(self) -> NeonBenchmarker:
        pass

    @abstractmethod
@@ -51,20 +52,45 @@ class PgCompare(ABC):
    def record_duration(self, out_name):
        pass

+    @contextmanager
+    def record_pg_stats(self, pg_stats: List[PgStatTable]):
+        init_data = self._retrieve_pg_stats(pg_stats)

-class ZenithCompare(PgCompare):
-    """PgCompare interface for the zenith stack."""
+        yield
+
+        data = self._retrieve_pg_stats(pg_stats)
+
+        for k in set(init_data) & set(data):
+            self.zenbenchmark.record(k, data[k] - init_data[k], '', MetricReport.HIGHER_IS_BETTER)
+
+    def _retrieve_pg_stats(self, pg_stats: List[PgStatTable]) -> Dict[str, int]:
+        results: Dict[str, int] = {}
+
+        with self.pg.connect().cursor() as cur:
+            for pg_stat in pg_stats:
+                cur.execute(pg_stat.query)
+                row = cur.fetchone()
+                assert len(row) == len(pg_stat.columns)
+
+                for col, val in zip(pg_stat.columns, row):
+                    results[f"{pg_stat.table}.{col}"] = int(val)
+
+        return results
+
+
+class NeonCompare(PgCompare):
+    """PgCompare interface for the neon stack."""
    def __init__(self,
-                 zenbenchmark: ZenithBenchmarker,
-                 zenith_simple_env: ZenithEnv,
+                 zenbenchmark: NeonBenchmarker,
+                 neon_simple_env: NeonEnv,
                 pg_bin: PgBin,
                 branch_name):
-        self.env = zenith_simple_env
+        self.env = neon_simple_env
        self._zenbenchmark = zenbenchmark
        self._pg_bin = pg_bin

        # We only use one branch and one timeline
-        self.env.zenith_cli.create_branch(branch_name, 'empty')
+        self.env.neon_cli.create_branch(branch_name, 'empty')
        self._pg = self.env.postgres.create_start(branch_name)
        self.timeline = self.pg.safe_psql("SHOW neon.timeline_id")[0][0]

@@ -221,9 +247,9 @@ class RemoteCompare(PgCompare):


@pytest.fixture(scope='function')
-def zenith_compare(request, zenbenchmark, pg_bin, zenith_simple_env) -> ZenithCompare:
+def neon_compare(request, zenbenchmark, pg_bin, neon_simple_env) -> NeonCompare:
    branch_name = request.node.name
-    return ZenithCompare(zenbenchmark, zenith_simple_env, pg_bin, branch_name)
+    return NeonCompare(zenbenchmark, neon_simple_env, pg_bin, branch_name)


@pytest.fixture(scope='function')
@@ -236,13 +262,13 @@ def remote_compare(zenbenchmark, remote_pg) -> RemoteCompare:
    return RemoteCompare(zenbenchmark, remote_pg)


-@pytest.fixture(params=["vanilla_compare", "zenith_compare"], ids=["vanilla", "zenith"])
-def zenith_with_baseline(request) -> PgCompare:
-    """Parameterized fixture that helps compare zenith against vanilla postgres.
+@pytest.fixture(params=["vanilla_compare", "neon_compare"], ids=["vanilla", "neon"])
+def neon_with_baseline(request) -> PgCompare:
+    """Parameterized fixture that helps compare neon against vanilla postgres.

    A test that uses this fixture turns into a parameterized test that runs against:
    1. A vanilla postgres instance
-    2. A simple zenith env (see zenith_simple_env)
+    2. A simple neon env (see neon_simple_env)
    3. Possibly other postgres protocol implementations.

    The main goal of this fixture is to make it easier for people to read and write
@@ -254,7 +280,7 @@ def zenith_with_baseline(request) -> PgCompare:
    of that.

    If a test requires some one-off special implementation-specific logic, use of
-    isinstance(zenith_with_baseline, ZenithCompare) is encouraged. Though if that
+    isinstance(neon_with_baseline, NeonCompare) is encouraged. Though if that
    implementation-specific logic is widely useful across multiple tests, it might
    make sense to add methods to the PgCompare class.
    """
--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -29,7 +29,7 @@ from dataclasses import dataclass
 # Type-related stuff
 from psycopg2.extensions import connection as PgConnection
 from psycopg2.extensions import make_dsn, parse_dsn
-from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, TypeVar, cast, Union, Tuple
+from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
 from typing_extensions import Literal

 import requests
@@ -81,7 +81,7 @@ def pytest_addoption(parser):

 # These are set in pytest_configure()
 base_dir = ""
-zenith_binpath = ""
+neon_binpath = ""
 pg_distrib_dir = ""
 top_output_dir = ""

@@ -100,7 +100,7 @@ def check_interferring_processes(config):
        # result of the test.
        # NOTE this shows as an internal pytest error, there might be a better way
        raise Exception(
-            'Found interfering processes running. Stop all Zenith pageservers, nodes, safekeepers, as well as stand-alone Postgres.'
+            'Found interfering processes running. Stop all Neon pageservers, nodes, safekeepers, as well as stand-alone Postgres.'
        )


@@ -146,25 +146,25 @@ def pytest_configure(config):
            raise Exception('postgres not found at "{}"'.format(pg_distrib_dir))

    if os.getenv("REMOTE_ENV"):
-        # we are in remote env and do not have zenith binaries locally
+        # we are in remote env and do not have neon binaries locally
        # this is the case for benchmarks run on self-hosted runner
        return
-    # Find the zenith binaries.
-    global zenith_binpath
-    env_zenith_bin = os.environ.get('ZENITH_BIN')
-    if env_zenith_bin:
-        zenith_binpath = env_zenith_bin
+    # Find the neon binaries.
+    global neon_binpath
+    env_neon_bin = os.environ.get('ZENITH_BIN')
+    if env_neon_bin:
+        neon_binpath = env_neon_bin
    else:
-        zenith_binpath = os.path.join(base_dir, 'target/debug')
-    log.info(f'zenith_binpath is {zenith_binpath}')
-    if not os.path.exists(os.path.join(zenith_binpath, 'pageserver')):
-        raise Exception('zenith binaries not found at "{}"'.format(zenith_binpath))
+        neon_binpath = os.path.join(base_dir, 'target/debug')
+    log.info(f'neon_binpath is {neon_binpath}')
+    if not os.path.exists(os.path.join(neon_binpath, 'pageserver')):
+        raise Exception('neon binaries not found at "{}"'.format(neon_binpath))


 def profiling_supported():
    """Return True if the pageserver was compiled with the 'profiling' feature
    """
-    bin_pageserver = os.path.join(str(zenith_binpath), 'pageserver')
+    bin_pageserver = os.path.join(str(neon_binpath), 'pageserver')
    res = subprocess.run([bin_pageserver, '--version'],
                         check=True,
                         universal_newlines=True,
@@ -223,7 +223,7 @@ def can_bind(host: str, port: int) -> bool:
        # TODO: The pageserver and safekeepers don't use SO_REUSEADDR at the
        # moment. If that changes, we should use start using SO_REUSEADDR here
        # too, to allow reusing ports more quickly.
-        # See https://github.com/zenithdb/zenith/issues/801
+        # See https://github.com/neondatabase/neon/issues/801
        #sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

        try:
@@ -479,27 +479,30 @@ class RemoteStorageUsers(Flag):
    SAFEKEEPER = auto()


-class ZenithEnvBuilder:
+class NeonEnvBuilder:
    """
-    Builder object to create a Zenith runtime environment
+    Builder object to create a Neon runtime environment

-    You should use the `zenith_env_builder` or `zenith_simple_env` pytest
-    fixture to create the ZenithEnv object. That way, the repository is
+    You should use the `neon_env_builder` or `neon_simple_env` pytest
+    fixture to create the NeonEnv object. That way, the repository is
    created in the right directory, based on the test name, and it's properly
    cleaned up after the test has finished.
    """
-    def __init__(self,
-                 repo_dir: Path,
-                 port_distributor: PortDistributor,
-                 broker: Etcd,
-                 mock_s3_server: MockS3Server,
-                 remote_storage: Optional[RemoteStorage] = None,
-                 remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
-                 pageserver_config_override: Optional[str] = None,
-                 num_safekeepers: int = 1,
-                 pageserver_auth_enabled: bool = False,
-                 rust_log_override: Optional[str] = None,
-                 default_branch_name=DEFAULT_BRANCH_NAME):
+    def __init__(
+            self,
+            repo_dir: Path,
+            port_distributor: PortDistributor,
+            broker: Etcd,
+            mock_s3_server: MockS3Server,
+            remote_storage: Optional[RemoteStorage] = None,
+            remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
+            pageserver_config_override: Optional[str] = None,
+            num_safekeepers: int = 1,
+            # Use non-standard SK ids to check for various parsing bugs
+            safekeepers_id_start: int = 0,
+            auth_enabled: bool = False,
+            rust_log_override: Optional[str] = None,
+            default_branch_name=DEFAULT_BRANCH_NAME):
        self.repo_dir = repo_dir
        self.rust_log_override = rust_log_override
        self.port_distributor = port_distributor
@@ -509,20 +512,21 @@ class ZenithEnvBuilder:
        self.mock_s3_server = mock_s3_server
        self.pageserver_config_override = pageserver_config_override
        self.num_safekeepers = num_safekeepers
-        self.pageserver_auth_enabled = pageserver_auth_enabled
+        self.safekeepers_id_start = safekeepers_id_start
+        self.auth_enabled = auth_enabled
        self.default_branch_name = default_branch_name
-        self.env: Optional[ZenithEnv] = None
+        self.env: Optional[NeonEnv] = None

-    def init(self) -> ZenithEnv:
+    def init(self) -> NeonEnv:
        # Cannot create more than one environment from one builder
        assert self.env is None, "environment already initialized"
-        self.env = ZenithEnv(self)
+        self.env = NeonEnv(self)
        return self.env

    def start(self):
        self.env.start()

-    def init_start(self) -> ZenithEnv:
+    def init_start(self) -> NeonEnv:
        env = self.init()
        self.start()
        return env
@@ -571,12 +575,12 @@ class ZenithEnvBuilder:
            self.env.pageserver.stop(immediate=True)


-class ZenithEnv:
+class NeonEnv:
    """
-    An object representing the Zenith runtime environment. It consists of
+    An object representing the Neon runtime environment. It consists of
    the page server, 0-N safekeepers, and the compute nodes.

-    ZenithEnv contains functions for stopping/starting nodes in the
+    NeonEnv contains functions for stopping/starting nodes in the
    environment, checking their status, creating tenants, connecting to the
    nodes, creating and destroying compute nodes, etc. The page server and
    the safekeepers are considered fixed in the environment, you cannot
@@ -584,7 +588,7 @@ class ZenithEnv:
    likely change in the future, as we start supporting multiple page
    servers and adding/removing safekeepers on the fly).

-    Some notable functions and fields in ZenithEnv:
+    Some notable functions and fields in NeonEnv:

    postgres - A factory object for creating postgres compute nodes.

@@ -598,24 +602,24 @@ class ZenithEnv:

    initial_tenant - tenant ID of the initial tenant created in the repository

-    zenith_cli - can be used to run the 'zenith' CLI tool
+    neon_cli - can be used to run the 'neon' CLI tool

    create_tenant() - initializes a new tenant in the page server, returns
        the tenant id
    """
-    def __init__(self, config: ZenithEnvBuilder):
+    def __init__(self, config: NeonEnvBuilder):
        self.repo_dir = config.repo_dir
        self.rust_log_override = config.rust_log_override
        self.port_distributor = config.port_distributor
        self.s3_mock_server = config.mock_s3_server
-        self.zenith_cli = ZenithCli(env=self)
+        self.neon_cli = NeonCli(env=self)
        self.postgres = PostgresFactory(self)
        self.safekeepers: List[Safekeeper] = []
        self.broker = config.broker
        self.remote_storage = config.remote_storage
        self.remote_storage_users = config.remote_storage_users

-        # generate initial tenant ID here instead of letting 'zenith init' generate it,
+        # generate initial tenant ID here instead of letting 'neon init' generate it,
        # so that we don't need to dig it out of the config file afterwards.
        self.initial_tenant = uuid.uuid4()

@@ -635,7 +639,7 @@ class ZenithEnv:
            pg=self.port_distributor.get_port(),
            http=self.port_distributor.get_port(),
        )
-        pageserver_auth_type = "ZenithJWT" if config.pageserver_auth_enabled else "Trust"
+        pageserver_auth_type = "ZenithJWT" if config.auth_enabled else "Trust"

        toml += textwrap.dedent(f"""
            [pageserver]
@@ -645,10 +649,10 @@ class ZenithEnv:
            auth_type = '{pageserver_auth_type}'
        """)

-        # Create a corresponding ZenithPageserver object
-        self.pageserver = ZenithPageserver(self,
-                                           port=pageserver_port,
-                                           config_override=config.pageserver_config_override)
+        # Create a corresponding NeonPageserver object
+        self.pageserver = NeonPageserver(self,
+                                         port=pageserver_port,
+                                         config_override=config.pageserver_config_override)

        # Create config and a Safekeeper object for each safekeeper
        for i in range(1, config.num_safekeepers + 1):
@@ -656,13 +660,17 @@ class ZenithEnv:
                pg=self.port_distributor.get_port(),
                http=self.port_distributor.get_port(),
            )
-            id = i  # assign ids sequentially
+            id = config.safekeepers_id_start + i  # assign ids sequentially
            toml += textwrap.dedent(f"""
                [[safekeepers]]
                id = {id}
                pg_port = {port.pg}
                http_port = {port.http}
                sync = false # Disable fsyncs to make the tests go faster""")
+            if config.auth_enabled:
+                toml += textwrap.dedent(f"""
+                auth_enabled = true
+                """)
            if bool(self.remote_storage_users
                    & RemoteStorageUsers.SAFEKEEPER) and self.remote_storage is not None:
                toml += textwrap.dedent(f"""
@@ -672,7 +680,7 @@ class ZenithEnv:
            self.safekeepers.append(safekeeper)

        log.info(f"Config: {toml}")
-        self.zenith_cli.init(toml)
+        self.neon_cli.init(toml)

    def start(self):
        # Start up broker, pageserver and all safekeepers
@@ -697,10 +705,10 @@ class ZenithEnv:
 def _shared_simple_env(request: Any,
                       port_distributor: PortDistributor,
                       mock_s3_server: MockS3Server,
-                       default_broker: Etcd) -> Iterator[ZenithEnv]:
+                       default_broker: Etcd) -> Iterator[NeonEnv]:
    """
-   # Internal fixture backing the `zenith_simple_env` fixture. If TEST_SHARED_FIXTURES
-    is set, this is shared by all tests using `zenith_simple_env`.
+   # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
+    is set, this is shared by all tests using `neon_simple_env`.
    """

    if os.environ.get('TEST_SHARED_FIXTURES') is None:
@@ -711,23 +719,23 @@ def _shared_simple_env(request: Any,
        repo_dir = os.path.join(str(top_output_dir), "shared_repo")
        shutil.rmtree(repo_dir, ignore_errors=True)

-    with ZenithEnvBuilder(Path(repo_dir), port_distributor, default_broker,
-                          mock_s3_server) as builder:
+    with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
+                        mock_s3_server) as builder:
        env = builder.init_start()

        # For convenience in tests, create a branch from the freshly-initialized cluster.
-        env.zenith_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME)
+        env.neon_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME)

        yield env


@pytest.fixture(scope='function')
-def zenith_simple_env(_shared_simple_env: ZenithEnv) -> Iterator[ZenithEnv]:
+def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
    """
-    Simple Zenith environment, with no authentication and no safekeepers.
+    Simple Neon environment, with no authentication and no safekeepers.

    If TEST_SHARED_FIXTURES environment variable is set, we reuse the same
-    environment for all tests that use 'zenith_simple_env', keeping the
+    environment for all tests that use 'neon_simple_env', keeping the
    page server and safekeepers running. Any compute nodes are stopped after
    each the test, however.
    """
@@ -737,17 +745,17 @@ def zenith_simple_env(_shared_simple_env: ZenithEnv) -> Iterator[ZenithEnv]:


@pytest.fixture(scope='function')
-def zenith_env_builder(test_output_dir,
-                       port_distributor: PortDistributor,
-                       mock_s3_server: MockS3Server,
-                       default_broker: Etcd) -> Iterator[ZenithEnvBuilder]:
+def neon_env_builder(test_output_dir,
+                     port_distributor: PortDistributor,
+                     mock_s3_server: MockS3Server,
+                     default_broker: Etcd) -> Iterator[NeonEnvBuilder]:
    """
-    Fixture to create a Zenith environment for test.
+    Fixture to create a Neon environment for test.

-    To use, define 'zenith_env_builder' fixture in your test to get access to the
+    To use, define 'neon_env_builder' fixture in your test to get access to the
    builder object. Set properties on it to describe the environment.
    Finally, initialize and start up the environment by calling
-    zenith_env_builder.init_start().
+    neon_env_builder.init_start().

    After the initialization, you can launch compute nodes by calling
    the functions in the 'env.postgres' factory object, stop/start the
@@ -758,16 +766,16 @@ def zenith_env_builder(test_output_dir,
    repo_dir = os.path.join(test_output_dir, "repo")

    # Return the builder to the caller
-    with ZenithEnvBuilder(Path(repo_dir), port_distributor, default_broker,
-                          mock_s3_server) as builder:
+    with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
+                        mock_s3_server) as builder:
        yield builder


-class ZenithPageserverApiException(Exception):
+class NeonPageserverApiException(Exception):
    pass


-class ZenithPageserverHttpClient(requests.Session):
+class NeonPageserverHttpClient(requests.Session):
    def __init__(self, port: int, auth_token: Optional[str] = None):
        super().__init__()
        self.port = port
@@ -784,7 +792,7 @@ class ZenithPageserverHttpClient(requests.Session):
                msg = res.json()['msg']
            except:
                msg = ''
-            raise ZenithPageserverApiException(msg) from e
+            raise NeonPageserverApiException(msg) from e

    def check_status(self):
        self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
@@ -891,12 +899,12 @@ TIMELINE_DATA_EXTRACTOR = re.compile(r"\s(?P<branch_name>[^\s]+)\s\[(?P<timeline
                                     re.MULTILINE)


-class ZenithCli:
+class NeonCli:
    """
-    A typed wrapper around the `zenith` CLI tool.
+    A typed wrapper around the `neon` CLI tool.
    Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
    """
-    def __init__(self, env: ZenithEnv):
+    def __init__(self, env: NeonEnv):
        self.env = env
        pass

@@ -982,7 +990,7 @@ class ZenithCli:
            created_timeline_id = matches.group('timeline_id')

        if created_timeline_id is None:
-            raise Exception('could not find timeline id after `zenith timeline create` invocation')
+            raise Exception('could not find timeline id after `neon timeline create` invocation')
        else:
            return uuid.UUID(created_timeline_id)

@@ -1014,13 +1022,13 @@ class ZenithCli:
            created_timeline_id = matches.group('timeline_id')

        if created_timeline_id is None:
-            raise Exception('could not find timeline id after `zenith timeline create` invocation')
+            raise Exception('could not find timeline id after `neon timeline create` invocation')
        else:
            return uuid.UUID(created_timeline_id)

    def list_timelines(self, tenant_id: Optional[uuid.UUID] = None) -> List[Tuple[str, str]]:
        """
-        Returns a list of (branch_name, timeline_id) tuples out of parsed `zenith timeline list` CLI output.
+        Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
        """

        # (L) main [b49f7954224a0ad25cc0013ea107b54b]
@@ -1053,7 +1061,7 @@ class ZenithCli:
            return res

    def pageserver_enabled_features(self) -> Any:
-        bin_pageserver = os.path.join(str(zenith_binpath), 'pageserver')
+        bin_pageserver = os.path.join(str(neon_binpath), 'pageserver')
        args = [bin_pageserver, '--enabled-features']
        log.info('Running command "{}"'.format(' '.join(args)))

@@ -1093,7 +1101,7 @@ class ZenithCli:
                        immediate=False) -> 'subprocess.CompletedProcess[str]':
        args = ['safekeeper', 'stop']
        if id is not None:
-            args.extend(str(id))
+            args.append(str(id))
        if immediate:
            args.extend(['-m', 'immediate'])
        return self.raw_cli(args)
@@ -1173,27 +1181,27 @@ class ZenithCli:
                extra_env_vars: Optional[Dict[str, str]] = None,
                check_return_code=True) -> 'subprocess.CompletedProcess[str]':
        """
-        Run "zenith" with the specified arguments.
+        Run "neon" with the specified arguments.

        Arguments must be in list form, e.g. ['pg', 'create']

        Return both stdout and stderr, which can be accessed as

-        >>> result = env.zenith_cli.raw_cli(...)
+        >>> result = env.neon_cli.raw_cli(...)
        >>> assert result.stderr == ""
        >>> log.info(result.stdout)
        """

        assert type(arguments) == list

-        bin_zenith = os.path.join(str(zenith_binpath), 'neon_local')
+        bin_neon = os.path.join(str(neon_binpath), 'neon_local')

-        args = [bin_zenith] + arguments
+        args = [bin_neon] + arguments
        log.info('Running command "{}"'.format(' '.join(args)))
        log.info(f'Running in "{self.env.repo_dir}"')

        env_vars = os.environ.copy()
-        env_vars['ZENITH_REPO_DIR'] = str(self.env.repo_dir)
+        env_vars['NEON_REPO_DIR'] = str(self.env.repo_dir)
        env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
        if self.env.rust_log_override is not None:
            env_vars['RUST_LOG'] = self.env.rust_log_override
@@ -1231,20 +1239,20 @@ class ZenithCli:
        return res


-class ZenithPageserver(PgProtocol):
+class NeonPageserver(PgProtocol):
    """
    An object representing a running pageserver.

-    Initializes the repository via `zenith init`.
+    Initializes the repository via `neon init`.
    """
-    def __init__(self, env: ZenithEnv, port: PageserverPort, config_override: Optional[str] = None):
+    def __init__(self, env: NeonEnv, port: PageserverPort, config_override: Optional[str] = None):
        super().__init__(host='localhost', port=port.pg, user='cloud_admin')
        self.env = env
        self.running = False
        self.service_port = port
        self.config_override = config_override

-    def start(self, overrides=()) -> 'ZenithPageserver':
+    def start(self, overrides=()) -> 'NeonPageserver':
        """
        Start the page server.
        `overrides` allows to add some config to this pageserver start.
@@ -1252,17 +1260,17 @@ class ZenithPageserver(PgProtocol):
        """
        assert self.running == False

-        self.env.zenith_cli.pageserver_start(overrides=overrides)
+        self.env.neon_cli.pageserver_start(overrides=overrides)
        self.running = True
        return self

-    def stop(self, immediate=False) -> 'ZenithPageserver':
+    def stop(self, immediate=False) -> 'NeonPageserver':
        """
        Stop the page server.
        Returns self.
        """
        if self.running:
-            self.env.zenith_cli.pageserver_stop(immediate)
+            self.env.neon_cli.pageserver_stop(immediate)
            self.running = False
        return self

@@ -1272,8 +1280,8 @@ class ZenithPageserver(PgProtocol):
    def __exit__(self, exc_type, exc, tb):
        self.stop(True)

-    def http_client(self, auth_token: Optional[str] = None) -> ZenithPageserverHttpClient:
-        return ZenithPageserverHttpClient(
+    def http_client(self, auth_token: Optional[str] = None) -> NeonPageserverHttpClient:
+        return NeonPageserverHttpClient(
            port=self.service_port.http,
            auth_token=auth_token,
        )
@@ -1371,6 +1379,7 @@ class VanillaPostgres(PgProtocol):
        self.pg_bin = pg_bin
        self.running = False
        self.pg_bin.run_capture(['initdb', '-D', pgdatadir])
+        self.configure([f"port = {port}\n"])

    def configure(self, options: List[str]):
        """Append lines into postgresql.conf file."""
@@ -1405,10 +1414,12 @@ class VanillaPostgres(PgProtocol):


@pytest.fixture(scope='function')
-def vanilla_pg(test_output_dir: str) -> Iterator[VanillaPostgres]:
+def vanilla_pg(test_output_dir: str,
+               port_distributor: PortDistributor) -> Iterator[VanillaPostgres]:
    pgdatadir = os.path.join(test_output_dir, "pgdata-vanilla")
    pg_bin = PgBin(test_output_dir)
-    with VanillaPostgres(pgdatadir, pg_bin, 5432) as vanilla_pg:
+    port = port_distributor.get_port()
+    with VanillaPostgres(pgdatadir, pg_bin, port) as vanilla_pg:
        yield vanilla_pg


@@ -1453,8 +1464,8 @@ def remote_pg(test_output_dir: str) -> Iterator[RemotePostgres]:
        yield remote_pg


-class ZenithProxy(PgProtocol):
-    def __init__(self, port: int):
+class NeonProxy(PgProtocol):
+    def __init__(self, port: int, pg_port: int):
        super().__init__(host="127.0.0.1",
                         user="proxy_user",
                         password="pytest2",
@@ -1463,18 +1474,20 @@ class ZenithProxy(PgProtocol):
        self.http_port = 7001
        self.host = "127.0.0.1"
        self.port = port
+        self.pg_port = pg_port
        self._popen: Optional[subprocess.Popen[bytes]] = None

-    def start_static(self, addr="127.0.0.1:5432") -> None:
+    def start(self) -> None:
        assert self._popen is None

        # Start proxy
-        bin_proxy = os.path.join(str(zenith_binpath), 'proxy')
+        bin_proxy = os.path.join(str(neon_binpath), 'proxy')
        args = [bin_proxy]
        args.extend(["--http", f"{self.host}:{self.http_port}"])
        args.extend(["--proxy", f"{self.host}:{self.port}"])
        args.extend(["--auth-backend", "postgres"])
-        args.extend(["--auth-endpoint", "postgres://proxy_auth:pytest1@localhost:5432/postgres"])
+        args.extend(
+            ["--auth-endpoint", f"postgres://proxy_auth:pytest1@localhost:{self.pg_port}/postgres"])
        self._popen = subprocess.Popen(args)
        self._wait_until_ready()

@@ -1493,20 +1506,22 @@ class ZenithProxy(PgProtocol):


@pytest.fixture(scope='function')
-def static_proxy(vanilla_pg) -> Iterator[ZenithProxy]:
-    """Zenith proxy that routes directly to vanilla postgres."""
+def static_proxy(vanilla_pg, port_distributor) -> Iterator[NeonProxy]:
+    """Neon proxy that routes directly to vanilla postgres."""
    vanilla_pg.start()
    vanilla_pg.safe_psql("create user proxy_auth with password 'pytest1' superuser")
    vanilla_pg.safe_psql("create user proxy_user with password 'pytest2'")

-    with ZenithProxy(4432) as proxy:
-        proxy.start_static()
+    port = port_distributor.get_port()
+    pg_port = vanilla_pg.default_options['port']
+    with NeonProxy(port, pg_port) as proxy:
+        proxy.start()
        yield proxy


 class Postgres(PgProtocol):
    """ An object representing a running postgres daemon. """
-    def __init__(self, env: ZenithEnv, tenant_id: uuid.UUID, port: int):
+    def __init__(self, env: NeonEnv, tenant_id: uuid.UUID, port: int):
        super().__init__(host='localhost', port=port, user='cloud_admin', dbname='postgres')
        self.env = env
        self.running = False
@@ -1532,11 +1547,11 @@ class Postgres(PgProtocol):
            config_lines = []

        self.node_name = node_name or f'{branch_name}_pg_node'
-        self.env.zenith_cli.pg_create(branch_name,
-                                      node_name=self.node_name,
-                                      tenant_id=self.tenant_id,
-                                      lsn=lsn,
-                                      port=self.port)
+        self.env.neon_cli.pg_create(branch_name,
+                                    node_name=self.node_name,
+                                    tenant_id=self.tenant_id,
+                                    lsn=lsn,
+                                    port=self.port)
        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name
        self.pgdata_dir = os.path.join(self.env.repo_dir, path)

@@ -1560,9 +1575,9 @@ class Postgres(PgProtocol):

        log.info(f"Starting postgres node {self.node_name}")

-        run_result = self.env.zenith_cli.pg_start(self.node_name,
-                                                  tenant_id=self.tenant_id,
-                                                  port=self.port)
+        run_result = self.env.neon_cli.pg_start(self.node_name,
+                                                tenant_id=self.tenant_id,
+                                                port=self.port)
        self.running = True

        log.info(f"stdout: {run_result.stdout}")
@@ -1630,7 +1645,7 @@ class Postgres(PgProtocol):

        if self.running:
            assert self.node_name is not None
-            self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id)
+            self.env.neon_cli.pg_stop(self.node_name, self.tenant_id)
            self.running = False

        return self
@@ -1642,7 +1657,7 @@ class Postgres(PgProtocol):
        """

        assert self.node_name is not None
-        self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id, True)
+        self.env.neon_cli.pg_stop(self.node_name, self.tenant_id, True)
        self.node_name = None
        self.running = False

@@ -1679,7 +1694,7 @@ class Postgres(PgProtocol):

 class PostgresFactory:
    """ An object representing multiple running postgres daemons. """
-    def __init__(self, env: ZenithEnv):
+    def __init__(self, env: NeonEnv):
        self.env = env
        self.num_instances = 0
        self.instances: List[Postgres] = []
@@ -1750,15 +1765,14 @@ class SafekeeperPort:
@dataclass
 class Safekeeper:
    """ An object representing a running safekeeper daemon. """
-    env: ZenithEnv
+    env: NeonEnv
    port: SafekeeperPort
    id: int
-    auth_token: Optional[str] = None
    running: bool = False

    def start(self) -> 'Safekeeper':
        assert self.running == False
-        self.env.zenith_cli.safekeeper_start(self.id)
+        self.env.neon_cli.safekeeper_start(self.id)
        self.running = True
        # wait for wal acceptor start by checking its status
        started_at = time.time()
@@ -1778,7 +1792,7 @@ class Safekeeper:

    def stop(self, immediate=False) -> 'Safekeeper':
        log.info('Stopping safekeeper {}'.format(self.id))
-        self.env.zenith_cli.safekeeper_stop(self.id, immediate)
+        self.env.neon_cli.safekeeper_stop(self.id, immediate)
        self.running = False
        return self

@@ -1809,8 +1823,8 @@ class Safekeeper:
                assert isinstance(res, dict)
                return res

-    def http_client(self) -> SafekeeperHttpClient:
-        return SafekeeperHttpClient(port=self.port.http)
+    def http_client(self, auth_token: Optional[str] = None) -> SafekeeperHttpClient:
+        return SafekeeperHttpClient(port=self.port.http, auth_token=auth_token)

    def data_dir(self) -> str:
        return os.path.join(self.env.repo_dir, "safekeepers", f"sk{self.id}")
@@ -1834,9 +1848,15 @@ class SafekeeperMetrics:


 class SafekeeperHttpClient(requests.Session):
-    def __init__(self, port: int):
+    HTTPError = requests.HTTPError
+
+    def __init__(self, port: int, auth_token: Optional[str] = None):
        super().__init__()
        self.port = port
+        self.auth_token = auth_token
+
+        if auth_token is not None:
+            self.headers['Authorization'] = f'Bearer {auth_token}'

    def check_status(self):
        self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
@@ -1966,7 +1986,7 @@ def get_test_output_dir(request: Any) -> str:

 # This is autouse, so the test output directory always gets created, even
 # if a test doesn't put anything there. It also solves a problem with the
-# zenith_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
+# neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
 # creates the repo in the test output directory. But it cannot depend on
 # 'test_output_dir' fixture, because when TEST_SHARED_FIXTURES is not set,
 # it has 'session' scope and cannot access fixtures with 'function'
@@ -2044,7 +2064,7 @@ def list_files_to_compare(pgdata_dir: str):


 # pg is the existing and running compute node, that we want to compare with a basebackup
-def check_restored_datadir_content(test_output_dir: str, env: ZenithEnv, pg: Postgres):
+def check_restored_datadir_content(test_output_dir: str, env: NeonEnv, pg: Postgres):

    # Get the timeline ID. We need it for the 'basebackup' command
    with closing(pg.connect()) as conn:
@@ -2134,7 +2154,7 @@ def wait_until(number_of_iterations: int, interval: int, func):
    raise Exception("timed out while waiting for %s" % func) from last_exception


-def assert_local(pageserver_http_client: ZenithPageserverHttpClient,
+def assert_local(pageserver_http_client: NeonPageserverHttpClient,
                 tenant: uuid.UUID,
                 timeline: uuid.UUID):
    timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline)
@@ -2142,7 +2162,7 @@ def assert_local(pageserver_http_client: ZenithPageserverHttpClient,
    return timeline_detail


-def remote_consistent_lsn(pageserver_http_client: ZenithPageserverHttpClient,
+def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
                          tenant: uuid.UUID,
                          timeline: uuid.UUID) -> int:
    detail = pageserver_http_client.timeline_detail(tenant, timeline)
@@ -2158,7 +2178,7 @@ def remote_consistent_lsn(pageserver_http_client: ZenithPageserverHttpClient,
        return lsn_from_hex(lsn_str)


-def wait_for_upload(pageserver_http_client: ZenithPageserverHttpClient,
+def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient,
                    tenant: uuid.UUID,
                    timeline: uuid.UUID,
                    lsn: int):
@@ -2174,7 +2194,7 @@ def wait_for_upload(pageserver_http_client: ZenithPageserverHttpClient,
        lsn_to_hex(lsn), lsn_to_hex(current_lsn)))


-def last_record_lsn(pageserver_http_client: ZenithPageserverHttpClient,
+def last_record_lsn(pageserver_http_client: NeonPageserverHttpClient,
                    tenant: uuid.UUID,
                    timeline: uuid.UUID) -> int:
    detail = pageserver_http_client.timeline_detail(tenant, timeline)
@@ -2184,7 +2204,7 @@ def last_record_lsn(pageserver_http_client: ZenithPageserverHttpClient,
    return lsn_from_hex(lsn_str)


-def wait_for_last_record_lsn(pageserver_http_client: ZenithPageserverHttpClient,
+def wait_for_last_record_lsn(pageserver_http_client: NeonPageserverHttpClient,
                             tenant: uuid.UUID,
                             timeline: uuid.UUID,
                             lsn: int):
--- a/test_runner/fixtures/pg_stats.py
+++ b/test_runner/fixtures/pg_stats.py
@@ -0,0 +1,52 @@
+from typing import List
+
+import pytest
+
+
+class PgStatTable:
+    table: str
+    columns: List[str]
+    additional_query: str
+
+    def __init__(self, table: str, columns: List[str], filter_query: str = ""):
+        self.table = table
+        self.columns = columns
+        self.additional_query = filter_query
+
+    @property
+    def query(self) -> str:
+        return f"SELECT {','.join(self.columns)} FROM {self.table} {self.additional_query}"
+
+
+@pytest.fixture(scope='function')
+def pg_stats_rw() -> List[PgStatTable]:
+    return [
+        PgStatTable("pg_stat_database",
+                    ["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"],
+                    "WHERE datname='postgres'"),
+    ]
+
+
+@pytest.fixture(scope='function')
+def pg_stats_ro() -> List[PgStatTable]:
+    return [
+        PgStatTable("pg_stat_database", ["tup_returned", "tup_fetched"],
+                    "WHERE datname='postgres'"),
+    ]
+
+
+@pytest.fixture(scope='function')
+def pg_stats_wo() -> List[PgStatTable]:
+    return [
+        PgStatTable("pg_stat_database", ["tup_inserted", "tup_updated", "tup_deleted"],
+                    "WHERE datname='postgres'"),
+    ]
+
+
+@pytest.fixture(scope='function')
+def pg_stats_wal() -> List[PgStatTable]:
+    return [
+        PgStatTable("pg_stat_wal",
+                    ["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"],
+                    "")
+    ]
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bojan Serafimov	13cddbb10d	WIP	2022-06-15 18:19:21 -04:00
Anastasia Lubennikova	d11c9f9fcb	Use random ports for the proxy and local pg in tests Fixes #1931 Author: Dmitry Ivanov	2022-06-15 20:21:58 +03:00
Kirill Bulatov	d8a37452c8	Rename ZenithFeedback (#1912 )	2022-06-11 00:44:05 +03:00
chaitanya sharma	e1336f451d	renamed .zenith data-dir to .neon.	2022-06-09 18:19:18 +02:00
Arseny Sher	a4d8261390	Save Postgres log in test_find_end_of_wal_* tests.	2022-06-09 19:16:43 +04:00
Egor Suvorov	e2a5a31595	Safekeeper HTTP router: add comment about /v1/timeline	2022-06-09 17:14:46 +02:00
Egor Suvorov	0ac0fba77a	test_runner: test Safekeeper HTTP API Auth All endpoints except for POST /v1/timeline are tested, this one is not tested in any way yet. Three attempts for each endpoint: correctly authenticated, badly authenticated, unauthenticated.	2022-06-09 17:14:46 +02:00
Egor Suvorov	a001052cdd	test_runner: SafekeeperHttpClient: support auth	2022-06-09 17:14:46 +02:00
Egor Suvorov	1f1d852204	ZenithEnvBuilder: rename pageserver_auth_enabled --> auth_enabled	2022-06-09 17:14:46 +02:00
Egor Suvorov	f7b878611a	Implement JWT authentication in Safekeeper HTTP API (#1753 ) * `control_plane` crate (used by `neon_local`) now parses an `auth_enabled` bool for each Safekeeper * If auth is enabled, a Safekeeper is passed a path to a public key via a new command line argument * Added TODO comments to other places needing auth	2022-06-09 17:14:46 +02:00
Arseny Sher	a51b2dac9a	Don't s3 offload from newly joined safekeeper not having required WAL. I made the check at launcher level with the perspective of generally moving election (decision who offloads) there. Also log timeline 'active' changes.	2022-06-09 18:30:16 +04:00
Thang Pham	e22d9cee3a	fix `ZeroDivisionError` in `scripts/generate_perf_report_page` (#1906 ) Fixes the `ZeroDivisionError` error by adding `EPS=1e-6` when doing the calculation.	2022-06-08 09:15:12 -04:00
Arthur Petukhovsky	a01999bc4a	Replace most common remote logs with metrics (#1909 )	2022-06-08 13:36:49 +03:00
chaitanya sharma	32e64afd54	Use better parallel build instructions in readme.md (#1908 )	2022-06-08 11:25:37 +03:00
Kirill Bulatov	8a53472e4f	Force etcd broker keys to not to intersect	2022-06-08 11:21:05 +03:00
Dmitry Rodionov	6e26588d17	Allow to customize shutdown condition in PostgresBackend Use it in PageServerHandler to check per thread shutdown condition from thread_mgr which takes into account tenants and timelines	2022-06-07 22:11:54 +03:00
Arseny Sher	0b93253b3c	Fix leaked keepalive task in s3 offloading leader election. I still don't like the surroundings and feel we'd better get away without using election API at all, but this is a quick fix to keep CI green. ref #1815	2022-06-07 15:17:57 +04:00
Dmitry Rodionov	7dc6beacbd	make it possible to associate thread with a tenant after thread start	2022-06-07 12:59:35 +03:00
Thang Pham	6cfebc096f	Add read/write throughput performance tests (#1883 ) Part of #1467 This PR adds several performance tests that compare the [PG statistics](https://www.postgresql.org/docs/current/monitoring-stats.html) obtained when running PG benchmarks against Neon and vanilla PG to measure the read/write throughput of the DB.	2022-06-06 12:32:10 -04:00
KlimentSerafimov	fecad1ca34	Resolving issue #1745 . Added cluster option for SNI data (#1813 ) * Added project option in case SNI data is missing. Resolving issue #1745. * Added invariant checking for project name: if both sni_data and project_name are available then they should match.	2022-06-06 08:14:41 -04:00
bojanserafimov	92de8423af	Remove dead code (#1886 )	2022-06-05 09:18:11 -04:00
Dmitry Rodionov	e442f5357b	unify two identical failpoints in flush_frozen_layer probably is a merge artfact	2022-06-03 19:36:09 +03:00
Arseny Sher	5a723d44cd	Parametrize test_normal_work. I like to run small test locally, but let's avoid duplication.	2022-06-03 20:32:53 +04:00
Kirill Bulatov	2623193876	Remove pageserver_connstr from WAL stream logic	2022-06-03 17:30:36 +03:00
Arseny Sher	70a53c4b03	Get backup test_safekeeper_normal_work, but skip by default. It is handy for development.	2022-06-03 16:12:14 +04:00
Arseny Sher	9e108102b3	Silence etcd safekeeper info key parse errors. When we subscribe to everything, it is ok to receive not only safekeeper timeline updates.	2022-06-03 16:12:14 +04:00
huming	9c846a93e8	chore(doc)	2022-06-03 14:24:27 +03:00
Kirill Bulatov	c5007d3916	Remove unused module	2022-06-03 00:23:13 +03:00
Kirill Bulatov	5b06599770	Simplify etcd key regex parsing	2022-06-03 00:23:13 +03:00
Kirill Bulatov	1d16ee92d4	Fix the Lsn difference reconnection	2022-06-03 00:23:13 +03:00
Kirill Bulatov	7933804284	Fix and test regex parsing	2022-06-03 00:23:13 +03:00
Kirill Bulatov	a91e0c299d	Reproduce etcd parsing bug in Python tests	2022-06-03 00:23:13 +03:00
Kirill Bulatov	b0c4ec0594	Log storage sync and etcd events a bit better	2022-06-03 00:23:13 +03:00
bojanserafimov	90e2c9ee1f	Rename zenith to neon in python tests (#1871 )	2022-06-02 16:21:28 -04:00
Egor Suvorov	aba5e5f8b5	GitHub Actions: pin Rust version to 1.58 like on CircleCI * Fix failing `cargo clippy` while we're here. The behavior has been changed in Rust 1.60: https://github.com/rust-lang/rust-clippy/issues/8928 * Add Rust version to the Cargo deps cache key	2022-06-02 17:45:53 +02:00
Dmitry Rodionov	b155fe0e2f	avoid perf test result context for pg regress	2022-06-02 17:41:34 +03:00
Ryan Russell	c71faae2c6	Docs readability cont Signed-off-by: Ryan Russell <git@ryanrussell.org>	2022-06-02 15:05:12 +02:00