break e2e test

safekeeper: remove local WAL files ignoring peer_horizon_lsn. (#8900 )
If peer safekeeper needs garbage collected segment it will be fetched now from s3 using on-demand WAL download. Reduces danger of running out of disk space when safekeeper fails.
2026-03-14 22:00:38 +00:00 · 2024-10-05 11:30:19 +01:00 · 2024-10-04 19:07:39 +03:00 · 2024-10-04 18:21:39 +03:00 · 2024-10-04 14:52:01 +00:00 · 2024-10-04 14:56:15 +01:00
119 changed files with 1921 additions and 1284 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -33,7 +33,7 @@ jobs:
      github-event-name: ${{ github.event_name }}

  cancel-previous-e2e-tests:
-    needs: [ check-permissions, promote-images, tag ]
+    needs: [ check-permissions ]
    if: github.event_name == 'pull_request'
    runs-on: ubuntu-22.04

@@ -518,7 +518,7 @@ jobs:

  trigger-e2e-tests:
    if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }}
-    needs: [ check-permissions, promote-images, tag, cancel-previous-e2e-tests ]
+    needs: [ check-permissions, promote-images, tag ]
    uses: ./.github/workflows/trigger-e2e-tests.yml
    secrets: inherit

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1265,6 +1265,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "bytes",
+ "camino",
 "cfg-if",
 "chrono",
 "clap",
@@ -7334,12 +7335,6 @@ version = "0.1.0"
 dependencies = [
 "ahash",
 "anyhow",
- "aws-config",
- "aws-runtime",
- "aws-sigv4",
- "aws-smithy-async",
- "aws-smithy-http",
- "aws-smithy-types",
 "base64 0.21.1",
 "base64ct",
 "bitflags 2.4.1",
@@ -7413,7 +7408,6 @@ dependencies = [
 "tracing",
 "tracing-core",
 "url",
- "uuid",
 "zeroize",
 "zstd",
 "zstd-safe",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,7 +53,7 @@ azure_storage_blobs = { version = "0.19", default-features = false, features = [
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
-aws-config = { version = "1.5", default-features = false, features=["rustls"] }
+aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
 aws-sdk-s3 = "1.52"
 aws-sdk-iam = "1.46.0"
 aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
--- a/compute/Dockerfile.compute-node
+++ b/compute/Dockerfile.compute-node
@@ -1075,6 +1075,20 @@ RUN set -e \
    && make -j $(nproc) dist_man_MANS= \
    && make install dist_man_MANS=

+#########################################################################################
+#
+# Compile the Neon-specific `local_proxy` binary
+#
+#########################################################################################
+FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
+ARG BUILD_TAG
+ENV BUILD_TAG=$BUILD_TAG
+
+USER nonroot
+# Copy entire project to get Cargo.* files with proper dependencies for the whole project
+COPY --chown=nonroot . .
+RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
+
 #########################################################################################
 #
 # Layers "postgres-exporter" and "sql-exporter"
@@ -1213,6 +1227,10 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
 COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
 COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini

+# local_proxy and its config
+COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
+RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
+
 # Metrics exporter binaries and  configuration files
 COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
 COPY --from=sql-exporter      /bin/sql_exporter      /bin/sql_exporter
--- a/compute/vm-image-spec.yaml
+++ b/compute/vm-image-spec.yaml
@@ -19,6 +19,10 @@ commands:
    user: postgres
    sysvInitAction: respawn
    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
+  - name: local_proxy
+    user: postgres
+    sysvInitAction: respawn
+    shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
  - name: postgres-exporter
    user: nobody
    sysvInitAction: respawn
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -11,7 +11,7 @@ testing = []

 [dependencies]
 anyhow.workspace = true
-# camino.workspace = true
+camino.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
 clap.workspace = true
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -34,6 +34,7 @@ use nix::sys::signal::{kill, Signal};
 use remote_storage::{DownloadError, RemotePath};

 use crate::checker::create_availability_check_data;
+use crate::local_proxy;
 use crate::logger::inlinify;
 use crate::pg_helpers::*;
 use crate::spec::*;
@@ -886,6 +887,11 @@ impl ComputeNode {
        // 'Close' connection
        drop(client);

+        if let Some(ref local_proxy) = spec.local_proxy_config {
+            info!("configuring local_proxy");
+            local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
+        }
+
        // Run migrations separately to not hold up cold starts
        thread::spawn(move || {
            let mut connstr = connstr.clone();
@@ -936,6 +942,19 @@ impl ComputeNode {
            });
        }

+        if let Some(ref local_proxy) = spec.local_proxy_config {
+            info!("configuring local_proxy");
+
+            // Spawn a thread to do the configuration,
+            // so that we don't block the main thread that starts Postgres.
+            let local_proxy = local_proxy.clone();
+            let _handle = Some(thread::spawn(move || {
+                if let Err(err) = local_proxy::configure(&local_proxy) {
+                    error!("error while configuring local_proxy: {err:?}");
+                }
+            }));
+        }
+
        // Write new config
        let pgdata_path = Path::new(&self.pgdata);
        let postgresql_conf_path = pgdata_path.join("postgresql.conf");
@@ -1023,6 +1042,19 @@ impl ComputeNode {
            });
        }

+        if let Some(local_proxy) = &pspec.spec.local_proxy_config {
+            info!("configuring local_proxy");
+
+            // Spawn a thread to do the configuration,
+            // so that we don't block the main thread that starts Postgres.
+            let local_proxy = local_proxy.clone();
+            let _handle = thread::spawn(move || {
+                if let Err(err) = local_proxy::configure(&local_proxy) {
+                    error!("error while configuring local_proxy: {err:?}");
+                }
+            });
+        }
+
        info!(
            "start_compute spec.remote_extensions {:?}",
            pspec.spec.remote_extensions
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -264,72 +264,68 @@ async fn handle_configure_request(

    let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
    let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
-    match serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
-        Ok(request) => {
-            let spec = request.spec;
+    if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
+        let spec = request.spec;

-            let parsed_spec = match ParsedSpec::try_from(spec) {
-                Ok(ps) => ps,
-                Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
-            };
+        let parsed_spec = match ParsedSpec::try_from(spec) {
+            Ok(ps) => ps,
+            Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
+        };

-            // XXX: wrap state update under lock in code blocks. Otherwise,
-            // we will try to `Send` `mut state` into the spawned thread
-            // bellow, which will cause error:
-            // ```
-            // error: future cannot be sent between threads safely
-            // ```
-            {
-                let mut state = compute.state.lock().unwrap();
-                if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
-                    let msg = format!(
-                        "invalid compute status for configuration request: {:?}",
-                        state.status.clone()
-                    );
-                    return Err((msg, StatusCode::PRECONDITION_FAILED));
+        // XXX: wrap state update under lock in code blocks. Otherwise,
+        // we will try to `Send` `mut state` into the spawned thread
+        // bellow, which will cause error:
+        // ```
+        // error: future cannot be sent between threads safely
+        // ```
+        {
+            let mut state = compute.state.lock().unwrap();
+            if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
+                let msg = format!(
+                    "invalid compute status for configuration request: {:?}",
+                    state.status.clone()
+                );
+                return Err((msg, StatusCode::PRECONDITION_FAILED));
+            }
+            state.pspec = Some(parsed_spec);
+            state.status = ComputeStatus::ConfigurationPending;
+            compute.state_changed.notify_all();
+            drop(state);
+            info!("set new spec and notified waiters");
+        }
+
+        // Spawn a blocking thread to wait for compute to become Running.
+        // This is needed to do not block the main pool of workers and
+        // be able to serve other requests while some particular request
+        // is waiting for compute to finish configuration.
+        let c = compute.clone();
+        task::spawn_blocking(move || {
+            let mut state = c.state.lock().unwrap();
+            while state.status != ComputeStatus::Running {
+                state = c.state_changed.wait(state).unwrap();
+                info!(
+                    "waiting for compute to become Running, current status: {:?}",
+                    state.status
+                );
+
+                if state.status == ComputeStatus::Failed {
+                    let err = state.error.as_ref().map_or("unknown error", |x| x);
+                    let msg = format!("compute configuration failed: {:?}", err);
+                    return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
                }
-                state.pspec = Some(parsed_spec);
-                state.status = ComputeStatus::ConfigurationPending;
-                compute.state_changed.notify_all();
-                drop(state);
-                info!("set new spec and notified waiters");
            }

-            // Spawn a blocking thread to wait for compute to become Running.
-            // This is needed to do not block the main pool of workers and
-            // be able to serve other requests while some particular request
-            // is waiting for compute to finish configuration.
-            let c = compute.clone();
-            task::spawn_blocking(move || {
-                let mut state = c.state.lock().unwrap();
-                while state.status != ComputeStatus::Running {
-                    state = c.state_changed.wait(state).unwrap();
-                    info!(
-                        "waiting for compute to become Running, current status: {:?}",
-                        state.status
-                    );
+            Ok(())
+        })
+        .await
+        .unwrap()?;

-                    if state.status == ComputeStatus::Failed {
-                        let err = state.error.as_ref().map_or("unknown error", |x| x);
-                        let msg = format!("compute configuration failed: {:?}", err);
-                        return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
-                    }
-                }
-
-                Ok(())
-            })
-            .await
-            .unwrap()?;
-
-            // Return current compute state if everything went well.
-            let state = compute.state.lock().unwrap().clone();
-            let status_response = status_response_from_state(&state);
-            Ok(serde_json::to_string(&status_response).unwrap())
-        }
-        Err(err) => {
-            error!("could not parse spec: {spec_raw}");
-            Err((format!("invalid spec: {err:?}"), StatusCode::BAD_REQUEST))
-        }
+        // Return current compute state if everything went well.
+        let state = compute.state.lock().unwrap().clone();
+        let status_response = status_response_from_state(&state);
+        Ok(serde_json::to_string(&status_response).unwrap())
+    } else {
+        Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
    }
 }

--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -15,7 +15,7 @@ pub mod catalog;
 pub mod compute;
 pub mod disk_quota;
 pub mod extension_server;
-// pub mod local_proxy;
+pub mod local_proxy;
 pub mod lsn_lease;
 mod migration;
 pub mod monitor;
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -109,7 +109,6 @@ pub struct ComputeSpec {

    /// Local Proxy configuration used for JWT authentication
    #[serde(default)]
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub local_proxy_config: Option<LocalProxySpec>,
 }

@@ -283,7 +282,7 @@ pub struct GenericOption {
 /// declare a `trait` on it.
 pub type GenericOptions = Option<Vec<GenericOption>>;

-/// Configured the local-proxy application with the relevant JWKS and roles it should
+/// Configured the local_proxy application with the relevant JWKS and roles it should
 /// use for authorizing connect requests using JWT.
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct LocalProxySpec {
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -14,7 +14,7 @@ use std::time::SystemTime;

 use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
 use anyhow::Result;
-use azure_core::request_options::{MaxResults, Metadata, Range};
+use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
 use azure_core::{Continuable, RetryOptions};
 use azure_identity::DefaultAzureCredential;
 use azure_storage::StorageCredentials;
@@ -33,10 +33,10 @@ use tracing::debug;
 use utils::backoff;

 use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
-use crate::ListingObject;
 use crate::{
-    config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing,
-    ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
+    config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
+    DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
+    TimeTravelError, TimeoutOrCancel,
 };

 pub struct AzureBlobStorage {
@@ -259,6 +259,7 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
    if let Some(http_err) = error.as_http_error() {
        match http_err.status() {
            StatusCode::NotFound => DownloadError::NotFound,
+            StatusCode::NotModified => DownloadError::Unmodified,
            StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
            _ => DownloadError::Other(anyhow::Error::new(error)),
        }
@@ -484,11 +485,16 @@ impl RemoteStorage for AzureBlobStorage {
    async fn download(
        &self,
        from: &RemotePath,
+        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        let blob_client = self.client.blob_client(self.relative_path_to_name(from));

-        let builder = blob_client.get();
+        let mut builder = blob_client.get();
+
+        if let Some(ref etag) = opts.etag {
+            builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
+        }

        self.download_for_builder(builder, cancel).await
    }
--- a/libs/remote_storage/src/error.rs
+++ b/libs/remote_storage/src/error.rs
@@ -5,6 +5,8 @@ pub enum DownloadError {
    BadInput(anyhow::Error),
    /// The file was not found in the remote storage.
    NotFound,
+    /// The caller provided an ETag, and the file was not modified.
+    Unmodified,
    /// A cancellation token aborted the download, typically during
    /// tenant detach or process shutdown.
    Cancelled,
@@ -24,6 +26,7 @@ impl std::fmt::Display for DownloadError {
                write!(f, "Failed to download a remote file due to user input: {e}")
            }
            DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
+            DownloadError::Unmodified => write!(f, "File was not modified"),
            DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
            DownloadError::Timeout => write!(f, "timeout"),
            DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
@@ -38,7 +41,7 @@ impl DownloadError {
    pub fn is_permanent(&self) -> bool {
        use DownloadError::*;
        match self {
-            BadInput(_) | NotFound | Cancelled => true,
+            BadInput(_) | NotFound | Unmodified | Cancelled => true,
            Timeout | Other(_) => false,
        }
    }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -161,6 +161,14 @@ pub struct Listing {
    pub keys: Vec<ListingObject>,
 }

+/// Options for downloads. The default value is a plain GET.
+#[derive(Default)]
+pub struct DownloadOpts {
+    /// If given, returns [`DownloadError::Unmodified`] if the object still has
+    /// the same ETag (using If-None-Match).
+    pub etag: Option<Etag>,
+}
+
 /// Storage (potentially remote) API to manage its state.
 /// This storage tries to be unaware of any layered repository context,
 /// providing basic CRUD operations for storage files.
@@ -245,6 +253,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
    async fn download(
        &self,
        from: &RemotePath,
+        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError>;

@@ -401,16 +410,18 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
        }
    }

+    /// See [`RemoteStorage::download`]
    pub async fn download(
        &self,
        from: &RemotePath,
+        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        match self {
-            Self::LocalFs(s) => s.download(from, cancel).await,
-            Self::AwsS3(s) => s.download(from, cancel).await,
-            Self::AzureBlob(s) => s.download(from, cancel).await,
-            Self::Unreliable(s) => s.download(from, cancel).await,
+            Self::LocalFs(s) => s.download(from, opts, cancel).await,
+            Self::AwsS3(s) => s.download(from, opts, cancel).await,
+            Self::AzureBlob(s) => s.download(from, opts, cancel).await,
+            Self::Unreliable(s) => s.download(from, opts, cancel).await,
        }
    }

@@ -572,7 +583,7 @@ impl GenericRemoteStorage {
    ) -> Result<Download, DownloadError> {
        match byte_range {
            Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
-            None => self.download(from, cancel).await,
+            None => self.download(from, &DownloadOpts::default(), cancel).await,
        }
    }

--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
 use utils::crashsafe::path_with_suffix_extension;

 use crate::{
-    Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, TimeTravelError,
-    TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
+    Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
+    TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

 use super::{RemoteStorage, StorageMetadata};
@@ -494,11 +494,17 @@ impl RemoteStorage for LocalFs {
    async fn download(
        &self,
        from: &RemotePath,
+        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        let target_path = from.with_base(&self.storage_root);

        let file_metadata = file_metadata(&target_path).await?;
+        let etag = mock_etag(&file_metadata);
+
+        if opts.etag.as_ref() == Some(&etag) {
+            return Err(DownloadError::Unmodified);
+        }

        let source = ReaderStream::new(
            fs::OpenOptions::new()
@@ -519,7 +525,6 @@ impl RemoteStorage for LocalFs {
        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
        let source = crate::support::DownloadStream::new(cancel_or_timeout, source);

-        let etag = mock_etag(&file_metadata);
        Ok(Download {
            metadata,
            last_modified: file_metadata
@@ -692,7 +697,7 @@ mod fs_tests {
    ) -> anyhow::Result<String> {
        let cancel = CancellationToken::new();
        let download = storage
-            .download(remote_storage_path, &cancel)
+            .download(remote_storage_path, &DownloadOpts::default(), &cancel)
            .await
            .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
        ensure!(
@@ -773,8 +778,8 @@ mod fs_tests {
            "We should upload and download the same contents"
        );

-        let non_existing_path = "somewhere/else";
-        match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?, &cancel).await {
+        let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
+        match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
            Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
            other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
        }
@@ -1101,7 +1106,13 @@ mod fs_tests {
            storage.upload(body, len, &path, None, &cancel).await?;
        }

-        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
+        let read = aggregate(
+            storage
+                .download(&path, &DownloadOpts::default(), &cancel)
+                .await?
+                .download_stream,
+        )
+        .await?;
        assert_eq!(body, read);

        let shorter = Bytes::from_static(b"shorter body");
@@ -1112,7 +1123,13 @@ mod fs_tests {
            storage.upload(body, len, &path, None, &cancel).await?;
        }

-        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
+        let read = aggregate(
+            storage
+                .download(&path, &DownloadOpts::default(), &cancel)
+                .await?
+                .download_stream,
+        )
+        .await?;
        assert_eq!(shorter, read);
        Ok(())
    }
@@ -1145,7 +1162,13 @@ mod fs_tests {
            storage.upload(body, len, &path, None, &cancel).await?;
        }

-        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
+        let read = aggregate(
+            storage
+                .download(&path, &DownloadOpts::default(), &cancel)
+                .await?
+                .download_stream,
+        )
+        .await?;
        assert_eq!(body, read);

        Ok(())
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -28,6 +28,7 @@ use aws_sdk_s3::{
    Client,
 };
 use aws_smithy_async::rt::sleep::TokioSleep;
+use http_types::StatusCode;

 use aws_smithy_types::{body::SdkBody, DateTime};
 use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
@@ -44,8 +45,8 @@ use crate::{
    error::Cancelled,
    metrics::{start_counting_cancelled_wait, start_measuring_requests},
    support::PermitCarrying,
-    ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath,
-    RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
+    ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
+    RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
    REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

@@ -67,6 +68,7 @@ pub struct S3Bucket {
 struct GetObjectRequest {
    bucket: String,
    key: String,
+    etag: Option<String>,
    range: Option<String>,
 }
 impl S3Bucket {
@@ -248,13 +250,18 @@ impl S3Bucket {

        let started_at = start_measuring_requests(kind);

-        let get_object = self
+        let mut builder = self
            .client
            .get_object()
            .bucket(request.bucket)
            .key(request.key)
-            .set_range(request.range)
-            .send();
+            .set_range(request.range);
+
+        if let Some(etag) = request.etag {
+            builder = builder.if_none_match(etag);
+        }
+
+        let get_object = builder.send();

        let get_object = tokio::select! {
            res = get_object => res,
@@ -277,6 +284,20 @@ impl S3Bucket {
                );
                return Err(DownloadError::NotFound);
            }
+            Err(SdkError::ServiceError(e))
+                // aws_smithy_runtime_api::http::response::StatusCode isn't
+                // re-exported by any aws crates, so just check the numeric
+                // status against http_types::StatusCode instead of pulling it.
+                if e.raw().status().as_u16() == StatusCode::NotModified =>
+            {
+                // Count an unmodified file as a success.
+                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
+                    kind,
+                    AttemptOutcome::Ok,
+                    started_at,
+                );
+                return Err(DownloadError::Unmodified);
+            }
            Err(e) => {
                crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
                    kind,
@@ -773,6 +794,7 @@ impl RemoteStorage for S3Bucket {
    async fn download(
        &self,
        from: &RemotePath,
+        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        // if prefix is not none then download file `prefix/from`
@@ -781,6 +803,7 @@ impl RemoteStorage for S3Bucket {
            GetObjectRequest {
                bucket: self.bucket_name.clone(),
                key: self.relative_path_to_s3_object(from),
+                etag: opts.etag.as_ref().map(|e| e.to_string()),
                range: None,
            },
            cancel,
@@ -807,6 +830,7 @@ impl RemoteStorage for S3Bucket {
            GetObjectRequest {
                bucket: self.bucket_name.clone(),
                key: self.relative_path_to_s3_object(from),
+                etag: None,
                range,
            },
            cancel,
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -12,8 +12,8 @@ use std::{collections::hash_map::Entry, sync::Arc};
 use tokio_util::sync::CancellationToken;

 use crate::{
-    Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
-    StorageMetadata, TimeTravelError,
+    Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath,
+    RemoteStorage, StorageMetadata, TimeTravelError,
 };

 pub struct UnreliableWrapper {
@@ -167,11 +167,12 @@ impl RemoteStorage for UnreliableWrapper {
    async fn download(
        &self,
        from: &RemotePath,
+        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        self.attempt(RemoteOp::Download(from.clone()))
            .map_err(DownloadError::Other)?;
-        self.inner.download(from, cancel).await
+        self.inner.download(from, opts, cancel).await
    }

    async fn download_byte_range(
--- a/libs/remote_storage/tests/common/tests.rs
+++ b/libs/remote_storage/tests/common/tests.rs
@@ -1,8 +1,7 @@
 use anyhow::Context;
 use camino::Utf8Path;
 use futures::StreamExt;
-use remote_storage::ListingMode;
-use remote_storage::RemotePath;
+use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
 use std::sync::Arc;
 use std::{collections::HashSet, num::NonZeroU32};
 use test_context::test_context;
@@ -284,7 +283,10 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    ctx.client.upload(data, len, &path, None, &cancel).await?;

    // Normal download request
-    let dl = ctx.client.download(&path, &cancel).await?;
+    let dl = ctx
+        .client
+        .download(&path, &DownloadOpts::default(), &cancel)
+        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);

@@ -337,6 +339,54 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    Ok(())
 }

+/// Tests that conditional downloads work properly, by returning
+/// DownloadError::Unmodified when the object ETag matches the given ETag.
+#[test_context(MaybeEnabledStorage)]
+#[tokio::test]
+async fn download_conditional(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
+    let MaybeEnabledStorage::Enabled(ctx) = ctx else {
+        return Ok(());
+    };
+    let cancel = CancellationToken::new();
+
+    // Create a file.
+    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
+    let data = bytes::Bytes::from_static("foo".as_bytes());
+    let (stream, len) = wrap_stream(data);
+    ctx.client.upload(stream, len, &path, None, &cancel).await?;
+
+    // Download it to obtain its etag.
+    let mut opts = DownloadOpts::default();
+    let download = ctx.client.download(&path, &opts, &cancel).await?;
+
+    // Download with the etag yields DownloadError::Unmodified.
+    opts.etag = Some(download.etag);
+    let result = ctx.client.download(&path, &opts, &cancel).await;
+    assert!(
+        matches!(result, Err(DownloadError::Unmodified)),
+        "expected DownloadError::Unmodified, got {result:?}"
+    );
+
+    // Replace the file contents.
+    let data = bytes::Bytes::from_static("bar".as_bytes());
+    let (stream, len) = wrap_stream(data);
+    ctx.client.upload(stream, len, &path, None, &cancel).await?;
+
+    // A download with the old etag should yield the new file.
+    let download = ctx.client.download(&path, &opts, &cancel).await?;
+    assert_ne!(download.etag, opts.etag.unwrap(), "ETag did not change");
+
+    // A download with the new etag should yield Unmodified again.
+    opts.etag = Some(download.etag);
+    let result = ctx.client.download(&path, &opts, &cancel).await;
+    assert!(
+        matches!(result, Err(DownloadError::Unmodified)),
+        "expected DownloadError::Unmodified, got {result:?}"
+    );
+
+    Ok(())
+}
+
 #[test_context(MaybeEnabledStorage)]
 #[tokio::test]
 async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
@@ -364,7 +414,10 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
    // Normal download request
    ctx.client.copy_object(&path, &path_dest, &cancel).await?;

-    let dl = ctx.client.download(&path_dest, &cancel).await?;
+    let dl = ctx
+        .client
+        .download(&path_dest, &DownloadOpts::default(), &cancel)
+        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);

@@ -376,3 +429,56 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {

    Ok(())
 }
+
+/// Tests that head_object works properly.
+#[test_context(MaybeEnabledStorage)]
+#[tokio::test]
+async fn head_object(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
+    let MaybeEnabledStorage::Enabled(ctx) = ctx else {
+        return Ok(());
+    };
+    let cancel = CancellationToken::new();
+
+    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
+
+    // Errors on missing file.
+    let result = ctx.client.head_object(&path, &cancel).await;
+    assert!(
+        matches!(result, Err(DownloadError::NotFound)),
+        "expected NotFound, got {result:?}"
+    );
+
+    // Create the file.
+    let data = bytes::Bytes::from_static("foo".as_bytes());
+    let (stream, len) = wrap_stream(data);
+    ctx.client.upload(stream, len, &path, None, &cancel).await?;
+
+    // Fetch the head metadata.
+    let object = ctx.client.head_object(&path, &cancel).await?;
+    assert_eq!(
+        object,
+        ListingObject {
+            key: path.clone(),
+            last_modified: object.last_modified, // ignore
+            size: 3
+        }
+    );
+
+    // Wait for a couple of seconds, and then update the file to check the last
+    // modified timestamp.
+    tokio::time::sleep(std::time::Duration::from_secs(2)).await;
+
+    let data = bytes::Bytes::from_static("bar".as_bytes());
+    let (stream, len) = wrap_stream(data);
+    ctx.client.upload(stream, len, &path, None, &cancel).await?;
+    let new = ctx.client.head_object(&path, &cancel).await?;
+
+    assert!(
+        !new.last_modified
+            .duration_since(object.last_modified)?
+            .is_zero(),
+        "last_modified did not advance"
+    );
+
+    Ok(())
+}
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -12,8 +12,8 @@ use anyhow::Context;
 use camino::Utf8Path;
 use futures_util::StreamExt;
 use remote_storage::{
-    DownloadError, GenericRemoteStorage, ListingMode, RemotePath, RemoteStorageConfig,
-    RemoteStorageKind, S3Config,
+    DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
+    RemoteStorageConfig, RemoteStorageKind, S3Config,
 };
 use test_context::test_context;
 use test_context::AsyncTestContext;
@@ -121,7 +121,8 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:

    // A little check to ensure that our clock is not too far off from the S3 clock
    {
-        let dl = retry(|| ctx.client.download(&path2, &cancel)).await?;
+        let opts = DownloadOpts::default();
+        let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;
        let last_modified = dl.last_modified;
        let half_wt = WAIT_TIME.mul_f32(0.5);
        let t0_hwt = t0 + half_wt;
@@ -159,7 +160,12 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
    let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
    println!("after recovery to t2: {t2_files_recovered:?}");
    assert_eq!(t2_files, t2_files_recovered);
-    let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
+    let path2_recovered_t2 = download_to_vec(
+        ctx.client
+            .download(&path2, &DownloadOpts::default(), &cancel)
+            .await?,
+    )
+    .await?;
    assert_eq!(path2_recovered_t2, new_data.as_bytes());

    // after recovery to t1: path1 is back, path2 has the old content
@@ -170,7 +176,12 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
    let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
    println!("after recovery to t1: {t1_files_recovered:?}");
    assert_eq!(t1_files, t1_files_recovered);
-    let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
+    let path2_recovered_t1 = download_to_vec(
+        ctx.client
+            .download(&path2, &DownloadOpts::default(), &cancel)
+            .await?,
+    )
+    .await?;
    assert_eq!(path2_recovered_t1, old_data.as_bytes());

    // after recovery to t0: everything is gone except for path1
@@ -416,7 +427,7 @@ async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
    let started_at = std::time::Instant::now();
    let mut stream = ctx
        .client
-        .download(&path, &cancel)
+        .download(&path, &DownloadOpts::default(), &cancel)
        .await
        .expect("download succeeds")
        .download_stream;
@@ -491,7 +502,7 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
    {
        let stream = ctx
            .client
-            .download(&path, &cancel)
+            .download(&path, &DownloadOpts::default(), &cancel)
            .await
            .expect("download succeeds")
            .download_stream;
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -27,7 +27,7 @@ use crate::tenant::Generation;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
 use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
 use crate::TEMP_FILE_SUFFIX;
-use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
+use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath};
 use utils::crashsafe::path_with_suffix_extension;
 use utils::id::{TenantId, TimelineId};
 use utils::pausable_failpoint;
@@ -153,7 +153,9 @@ async fn download_object<'a>(
                    .with_context(|| format!("create a destination file for layer '{dst_path}'"))
                    .map_err(DownloadError::Other)?;

-                let download = storage.download(src_path, cancel).await?;
+                let download = storage
+                    .download(src_path, &DownloadOpts::default(), cancel)
+                    .await?;

                pausable_failpoint!("before-downloading-layer-stream-pausable");

@@ -204,7 +206,9 @@ async fn download_object<'a>(
                    .with_context(|| format!("create a destination file for layer '{dst_path}'"))
                    .map_err(DownloadError::Other)?;

-                let mut download = storage.download(src_path, cancel).await?;
+                let mut download = storage
+                    .download(src_path, &DownloadOpts::default(), cancel)
+                    .await?;

                pausable_failpoint!("before-downloading-layer-stream-pausable");

@@ -344,7 +348,9 @@ async fn do_download_index_part(

    let index_part_bytes = download_retry_forever(
        || async {
-            let download = storage.download(&remote_path, cancel).await?;
+            let download = storage
+                .download(&remote_path, &DownloadOpts::default(), cancel)
+                .await?;

            let mut bytes = Vec::new();

@@ -526,10 +532,15 @@ pub(crate) async fn download_initdb_tar_zst(
                .with_context(|| format!("tempfile creation {temp_path}"))
                .map_err(DownloadError::Other)?;

-            let download = match storage.download(&remote_path, cancel).await {
+            let download = match storage
+                .download(&remote_path, &DownloadOpts::default(), cancel)
+                .await
+            {
                Ok(dl) => dl,
                Err(DownloadError::NotFound) => {
-                    storage.download(&remote_preserved_path, cancel).await?
+                    storage
+                        .download(&remote_preserved_path, &DownloadOpts::default(), cancel)
+                        .await?
                }
                Err(other) => Err(other)?,
            };
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -49,7 +49,7 @@ use futures::Future;
 use metrics::UIntGauge;
 use pageserver_api::models::SecondaryProgress;
 use pageserver_api::shard::TenantShardId;
-use remote_storage::{DownloadError, Etag, GenericRemoteStorage};
+use remote_storage::{DownloadError, DownloadOpts, Etag, GenericRemoteStorage};

 use tokio_util::sync::CancellationToken;
 use tracing::{info_span, instrument, warn, Instrument};
@@ -944,36 +944,34 @@ impl<'a> TenantDownloader<'a> {
    ) -> Result<HeatMapDownload, UpdateError> {
        debug_assert_current_span_has_tenant_id();
        let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
-        // TODO: pull up etag check into the request, to do a conditional GET rather than
-        // issuing a GET and then maybe ignoring the response body
-        // (https://github.com/neondatabase/neon/issues/6199)
        tracing::debug!("Downloading heatmap for secondary tenant",);

        let heatmap_path = remote_heatmap_path(tenant_shard_id);
        let cancel = &self.secondary_state.cancel;
+        let opts = DownloadOpts {
+            etag: prev_etag.cloned(),
+        };

        backoff::retry(
            || async {
-                let download = self
+                let download = match self
                    .remote_storage
-                    .download(&heatmap_path, cancel)
+                    .download(&heatmap_path, &opts, cancel)
                    .await
-                    .map_err(UpdateError::from)?;
+                {
+                    Ok(download) => download,
+                    Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),
+                    Err(err) => return Err(err.into()),
+                };

-                SECONDARY_MODE.download_heatmap.inc();
-
-                if Some(&download.etag) == prev_etag {
-                    Ok(HeatMapDownload::Unmodified)
-                } else {
-                    let mut heatmap_bytes = Vec::new();
-                    let mut body = tokio_util::io::StreamReader::new(download.download_stream);
-                    let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
-                    Ok(HeatMapDownload::Modified(HeatMapModified {
-                        etag: download.etag,
-                        last_modified: download.last_modified,
-                        bytes: heatmap_bytes,
-                    }))
-                }
+                let mut heatmap_bytes = Vec::new();
+                let mut body = tokio_util::io::StreamReader::new(download.download_stream);
+                let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
+                Ok(HeatMapDownload::Modified(HeatMapModified {
+                    etag: download.etag,
+                    last_modified: download.last_modified,
+                    bytes: heatmap_bytes,
+                }))
            },
            |e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),
            FAILED_DOWNLOAD_WARN_THRESHOLD,
@@ -984,6 +982,7 @@ impl<'a> TenantDownloader<'a> {
        .await
        .ok_or_else(|| UpdateError::Cancelled)
        .and_then(|x| x)
+        .inspect(|_| SECONDARY_MODE.download_heatmap.inc())
    }

    /// Download heatmap layers that are not present on local disk, or update their
--- a/pgxn/neon/walsender_hooks.c
+++ b/pgxn/neon/walsender_hooks.c
@@ -191,13 +191,14 @@ NeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)

 	if (!wal_reader)
 	{
-		XLogRecPtr	epochStartLsn = pg_atomic_read_u64(&GetWalpropShmemState()->propEpochStartLsn);
+		XLogRecPtr	basebackupLsn = GetRedoStartLsn();

-		if (epochStartLsn == 0)
+		/* should never happen */
+		if (basebackupLsn == 0)
 		{
-			elog(ERROR, "Unable to start walsender when propEpochStartLsn is 0!");
+			elog(ERROR, "unable to start walsender when basebackupLsn is 0");
 		}
-		wal_reader = NeonWALReaderAllocate(wal_segment_size, epochStartLsn, "[walsender] ");
+		wal_reader = NeonWALReaderAllocate(wal_segment_size, basebackupLsn, "[walsender] ");
 	}
 	xlr->page_read = NeonWALPageRead;
 	xlr->segment_open = NeonWALReadSegmentOpen;
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -77,10 +77,10 @@ struct LocalProxyCliArgs {
    #[clap(long, default_value = "127.0.0.1:5432")]
    compute: SocketAddr,
    /// Path of the local proxy config file
-    #[clap(long, default_value = "./localproxy.json")]
+    #[clap(long, default_value = "./local_proxy.json")]
    config_path: Utf8PathBuf,
    /// Path of the local proxy PID file
-    #[clap(long, default_value = "./localproxy.pid")]
+    #[clap(long, default_value = "./local_proxy.pid")]
    pid_path: Utf8PathBuf,
 }

--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -20,7 +20,7 @@ use tokio_postgres::tls::MakeTlsConnect;
 use tokio_postgres_rustls::MakeRustlsConnect;
 use tracing::{error, info, warn};

-const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
+pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";

 #[derive(Debug, Error)]
 pub(crate) enum ConnectionError {
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -1,5 +1,6 @@
 use crate::{
    auth::backend::ComputeCredentialKeys,
+    compute::COULD_NOT_CONNECT,
    compute::{self, PostgresConnection},
    config::RetryConfig,
    console::{self, errors::WakeComputeError, locks::ApiLocks, CachedNodeInfo, NodeInfo},
@@ -15,7 +16,7 @@ use crate::{
 use async_trait::async_trait;
 use pq_proto::StartupMessageParams;
 use tokio::time;
-use tracing::{error, info, warn};
+use tracing::{debug, info, warn};

 use super::retry::ShouldRetryWakeCompute;

@@ -116,7 +117,6 @@ where

    node_info.set_keys(user_info.get_keys());
    node_info.allow_self_signed_compute = allow_self_signed_compute;
-    // let mut node_info = credentials.get_node_info(ctx, user_info).await?;
    mechanism.update_connect_config(&mut node_info.config);
    let retry_type = RetryType::ConnectToCompute;

@@ -139,10 +139,10 @@ where
        Err(e) => e,
    };

-    error!(error = ?err, "could not connect to compute node");
+    debug!(error = ?err, COULD_NOT_CONNECT);

    let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {
-        // If we just recieved this from cplane and dodn't get it from cache, we shouldn't retry.
+        // If we just recieved this from cplane and didn't get it from cache, we shouldn't retry.
        // Do not need to retrieve a new node_info, just return the old one.
        if should_retry(&err, num_retries, connect_to_compute_retry_config) {
            Metrics::get().proxy.retries_metric.observe(
@@ -191,7 +191,7 @@ where
            }
            Err(e) => {
                if !should_retry(&e, num_retries, connect_to_compute_retry_config) {
-                    error!(error = ?e, num_retries, retriable = false, "couldn't connect to compute node");
+                    // Don't log an error here, caller will print the error
                    Metrics::get().proxy.retries_metric.observe(
                        RetriesMetricGroup {
                            outcome: ConnectOutcome::Failed,
@@ -202,7 +202,7 @@ where
                    return Err(e.into());
                }

-                warn!(error = ?e, num_retries, retriable = true, "couldn't connect to compute node");
+                warn!(error = ?e, num_retries, retriable = true, COULD_NOT_CONNECT);
            }
        };

--- a/safekeeper/src/remove_wal.rs
+++ b/safekeeper/src/remove_wal.rs
@@ -2,21 +2,29 @@ use utils::lsn::Lsn;

 use crate::timeline_manager::StateSnapshot;

-/// Get oldest LSN we still need to keep. We hold WAL till it is consumed
-/// by all of 1) pageserver (remote_consistent_lsn) 2) peers 3) s3
-/// offloading.
-/// While it is safe to use inmem values for determining horizon,
-/// we use persistent to make possible normal states less surprising.
-/// All segments covering LSNs before horizon_lsn can be removed.
+/// Get oldest LSN we still need to keep.
+///
+/// We hold WAL till it is consumed by
+/// 1) pageserver (remote_consistent_lsn)
+/// 2) s3 offloading.
+/// 3) Additionally we must store WAL since last local commit_lsn because
+///    that's where we start looking for last WAL record on start.
+///
+/// If some peer safekeeper misses data it will fetch it from the remote
+/// storage. While it is safe to use inmem values for determining horizon, we
+/// use persistent to make possible normal states less surprising. All segments
+/// covering LSNs before horizon_lsn can be removed.
 pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option<Lsn>) -> Lsn {
    use std::cmp::min;

-    let mut horizon_lsn = min(
-        state.cfile_remote_consistent_lsn,
-        state.cfile_peer_horizon_lsn,
-    );
+    let mut horizon_lsn = state.cfile_remote_consistent_lsn;
    // we don't want to remove WAL that is not yet offloaded to s3
    horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn);
+    // Min by local commit_lsn to be able to begin reading WAL from somewhere on
+    // sk start. Technically we don't allow local commit_lsn to be higher than
+    // flush_lsn, but let's be double safe by including it as well.
+    horizon_lsn = min(horizon_lsn, state.cfile_commit_lsn);
+    horizon_lsn = min(horizon_lsn, state.flush_lsn);
    if let Some(extra_horizon_lsn) = extra_horizon_lsn {
        horizon_lsn = min(horizon_lsn, extra_horizon_lsn);
    }
--- a/safekeeper/src/timeline_manager.rs
+++ b/safekeeper/src/timeline_manager.rs
@@ -47,7 +47,7 @@ pub(crate) struct StateSnapshot {
    pub(crate) remote_consistent_lsn: Lsn,

    // persistent control file values
-    pub(crate) cfile_peer_horizon_lsn: Lsn,
+    pub(crate) cfile_commit_lsn: Lsn,
    pub(crate) cfile_remote_consistent_lsn: Lsn,
    pub(crate) cfile_backup_lsn: Lsn,

@@ -70,7 +70,7 @@ impl StateSnapshot {
            commit_lsn: state.inmem.commit_lsn,
            backup_lsn: state.inmem.backup_lsn,
            remote_consistent_lsn: state.inmem.remote_consistent_lsn,
-            cfile_peer_horizon_lsn: state.peer_horizon_lsn,
+            cfile_commit_lsn: state.commit_lsn,
            cfile_remote_consistent_lsn: state.remote_consistent_lsn,
            cfile_backup_lsn: state.backup_lsn,
            flush_lsn: read_guard.sk.flush_lsn(),
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -526,6 +526,21 @@ pub(crate) enum ReconcileResultRequest {
    Stop,
 }

+#[derive(Clone)]
+struct MutationLocation {
+    node: Node,
+    generation: Generation,
+}
+
+#[derive(Clone)]
+struct ShardMutationLocations {
+    latest: MutationLocation,
+    other: Vec<MutationLocation>,
+}
+
+#[derive(Default, Clone)]
+struct TenantMutationLocations(BTreeMap<TenantShardId, ShardMutationLocations>);
+
 impl Service {
    pub fn get_config(&self) -> &Config {
        &self.config
@@ -2987,38 +3002,83 @@ impl Service {
        failpoint_support::sleep_millis_async!("tenant-create-timeline-shared-lock");

        self.tenant_remote_mutation(tenant_id, move |mut targets| async move {
-            if targets.is_empty() {
+            if targets.0.is_empty() {
                return Err(ApiError::NotFound(
                    anyhow::anyhow!("Tenant not found").into(),
                ));
            };
-            let shard_zero = targets.remove(0);
+
+            let (shard_zero_tid, shard_zero_locations) =
+                targets.0.pop_first().expect("Must have at least one shard");
+            assert!(shard_zero_tid.is_shard_zero());

            async fn create_one(
                tenant_shard_id: TenantShardId,
-                node: Node,
+                locations: ShardMutationLocations,
                jwt: Option<String>,
                create_req: TimelineCreateRequest,
            ) -> Result<TimelineInfo, ApiError> {
+                let latest = locations.latest.node;
+
                tracing::info!(
-                    "Creating timeline on shard {}/{}, attached to node {node}",
+                    "Creating timeline on shard {}/{}, attached to node {latest} in generation {:?}",
                    tenant_shard_id,
                    create_req.new_timeline_id,
+                    locations.latest.generation
                );
-                let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref());

-                client
+                let client =
+                    PageserverClient::new(latest.get_id(), latest.base_url(), jwt.as_deref());
+
+                let timeline_info = client
                    .timeline_create(tenant_shard_id, &create_req)
                    .await
-                    .map_err(|e| passthrough_api_error(&node, e))
+                    .map_err(|e| passthrough_api_error(&latest, e))?;
+
+                // We propagate timeline creations to all attached locations such that a compute
+                // for the new timeline is able to start regardless of the current state of the
+                // tenant shard reconciliation.
+                for location in locations.other {
+                    tracing::info!(
+                        "Creating timeline on shard {}/{}, stale attached to node {} in generation {:?}",
+                        tenant_shard_id,
+                        create_req.new_timeline_id,
+                        location.node,
+                        location.generation
+                    );
+
+                    let client = PageserverClient::new(
+                        location.node.get_id(),
+                        location.node.base_url(),
+                        jwt.as_deref(),
+                    );
+
+                    let res = client
+                        .timeline_create(tenant_shard_id, &create_req)
+                        .await;
+
+                    if let Err(e) = res {
+                        match e {
+                            mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, _) => {
+                                // Tenant might have been detached from the stale location,
+                                // so ignore 404s.
+                            },
+                            _ => {
+                                return Err(passthrough_api_error(&location.node, e));
+                            }
+                        }
+                    }
+                }
+
+                Ok(timeline_info)
            }

            // Because the caller might not provide an explicit LSN, we must do the creation first on a single shard, and then
            // use whatever LSN that shard picked when creating on subsequent shards.  We arbitrarily use shard zero as the shard
            // that will get the first creation request, and propagate the LSN to all the >0 shards.
            let timeline_info = create_one(
-                shard_zero.0,
-                shard_zero.1,
+                shard_zero_tid,
+                shard_zero_locations,
                self.config.jwt_token.clone(),
                create_req.clone(),
            )
@@ -3031,14 +3091,24 @@ impl Service {
            }

            // Create timeline on remaining shards with number >0
-            if !targets.is_empty() {
+            if !targets.0.is_empty() {
                // If we had multiple shards, issue requests for the remainder now.
                let jwt = &self.config.jwt_token;
                self.tenant_for_shards(
-                    targets.iter().map(|t| (t.0, t.1.clone())).collect(),
-                    |tenant_shard_id: TenantShardId, node: Node| {
+                    targets
+                        .0
+                        .iter()
+                        .map(|t| (*t.0, t.1.latest.node.clone()))
+                        .collect(),
+                    |tenant_shard_id: TenantShardId, _node: Node| {
                        let create_req = create_req.clone();
-                        Box::pin(create_one(tenant_shard_id, node, jwt.clone(), create_req))
+                        let mutation_locations = targets.0.remove(&tenant_shard_id).unwrap();
+                        Box::pin(create_one(
+                            tenant_shard_id,
+                            mutation_locations,
+                            jwt.clone(),
+                            create_req,
+                        ))
                    },
                )
                .await?;
@@ -3068,7 +3138,7 @@ impl Service {
        .await;

        self.tenant_remote_mutation(tenant_id, move |targets| async move {
-            if targets.is_empty() {
+            if targets.0.is_empty() {
                return Err(ApiError::NotFound(
                    anyhow::anyhow!("Tenant not found").into(),
                ));
@@ -3099,8 +3169,9 @@ impl Service {

            // no shard needs to go first/last; the operation should be idempotent
            // TODO: it would be great to ensure that all shards return the same error
+            let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
            let results = self
-                .tenant_for_shards(targets, |tenant_shard_id, node| {
+                .tenant_for_shards(locations, |tenant_shard_id, node| {
                    futures::FutureExt::boxed(config_one(
                        tenant_shard_id,
                        timeline_id,
@@ -3131,7 +3202,7 @@ impl Service {
        .await;

        self.tenant_remote_mutation(tenant_id, move |targets| async move {
-            if targets.is_empty() {
+            if targets.0.is_empty() {
                return Err(ApiError::NotFound(
                    anyhow::anyhow!("Tenant not found").into(),
                ));
@@ -3179,8 +3250,9 @@ impl Service {
            }

            // no shard needs to go first/last; the operation should be idempotent
+            let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
            let mut results = self
-                .tenant_for_shards(targets, |tenant_shard_id, node| {
+                .tenant_for_shards(locations, |tenant_shard_id, node| {
                    futures::FutureExt::boxed(detach_one(
                        tenant_shard_id,
                        timeline_id,
@@ -3227,7 +3299,7 @@ impl Service {
        .await;

        self.tenant_remote_mutation(tenant_id, move |targets| async move {
-            if targets.is_empty() {
+            if targets.0.is_empty() {
                return Err(ApiError::NotFound(
                    anyhow::anyhow!("Tenant not found").into(),
                ));
@@ -3249,7 +3321,12 @@ impl Service {
            }

            // no shard needs to go first/last; the operation should be idempotent
-            self.tenant_for_shards(targets, |tenant_shard_id, node| {
+            let locations = targets
+                .0
+                .iter()
+                .map(|t| (*t.0, t.1.latest.node.clone()))
+                .collect();
+            self.tenant_for_shards(locations, |tenant_shard_id, node| {
                futures::FutureExt::boxed(do_one(
                    tenant_shard_id,
                    timeline_id,
@@ -3344,11 +3421,11 @@ impl Service {
        op: O,
    ) -> Result<R, ApiError>
    where
-        O: FnOnce(Vec<(TenantShardId, Node)>) -> F,
+        O: FnOnce(TenantMutationLocations) -> F,
        F: std::future::Future<Output = R>,
    {
-        let target_gens = {
-            let mut targets = Vec::new();
+        let mutation_locations = {
+            let mut locations = TenantMutationLocations::default();

            // Load the currently attached pageservers for the latest generation of each shard.  This can
            // run concurrently with reconciliations, and it is not guaranteed that the node we find here
@@ -3399,14 +3476,50 @@ impl Service {
                    .ok_or(ApiError::Conflict(format!(
                        "Raced with removal of node {node_id}"
                    )))?;
-                targets.push((tenant_shard_id, node.clone(), generation));
+                let generation = generation.expect("Checked above");
+
+                let tenant = locked.tenants.get(&tenant_shard_id);
+
+                // TODO(vlad): Abstract the logic that finds stale attached locations
+                // from observed state into a [`Service`] method.
+                let other_locations = match tenant {
+                    Some(tenant) => {
+                        let mut other = tenant.attached_locations();
+                        let latest_location_index =
+                            other.iter().position(|&l| l == (node.get_id(), generation));
+                        if let Some(idx) = latest_location_index {
+                            other.remove(idx);
+                        }
+
+                        other
+                    }
+                    None => Vec::default(),
+                };
+
+                let location = ShardMutationLocations {
+                    latest: MutationLocation {
+                        node: node.clone(),
+                        generation,
+                    },
+                    other: other_locations
+                        .into_iter()
+                        .filter_map(|(node_id, generation)| {
+                            let node = locked.nodes.get(&node_id)?;
+
+                            Some(MutationLocation {
+                                node: node.clone(),
+                                generation,
+                            })
+                        })
+                        .collect(),
+                };
+                locations.0.insert(tenant_shard_id, location);
            }

-            targets
+            locations
        };

-        let targets = target_gens.iter().map(|t| (t.0, t.1.clone())).collect();
-        let result = op(targets).await;
+        let result = op(mutation_locations.clone()).await;

        // Post-check: are all the generations of all the shards the same as they were initially?  This proves that
        // our remote operation executed on the latest generation and is therefore persistent.
@@ -3422,9 +3535,10 @@ impl Service {
                     }| (tenant_shard_id, generation),
                )
                .collect::<Vec<_>>()
-                != target_gens
+                != mutation_locations
+                    .0
                    .into_iter()
-                    .map(|i| (i.0, i.2))
+                    .map(|i| (i.0, Some(i.1.latest.generation)))
                    .collect::<Vec<_>>()
            {
                // We raced with something that incremented the generation, and therefore cannot be
@@ -3454,12 +3568,14 @@ impl Service {
        .await;

        self.tenant_remote_mutation(tenant_id, move |mut targets| async move {
-            if targets.is_empty() {
+            if targets.0.is_empty() {
                return Err(ApiError::NotFound(
                    anyhow::anyhow!("Tenant not found").into(),
                ));
            }
-            let shard_zero = targets.remove(0);
+
+            let (shard_zero_tid, shard_zero_locations) = targets.0.pop_first().expect("Must have at least one shard");
+            assert!(shard_zero_tid.is_shard_zero());

            async fn delete_one(
                tenant_shard_id: TenantShardId,
@@ -3482,8 +3598,9 @@ impl Service {
                    })
            }

+            let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
            let statuses = self
-                .tenant_for_shards(targets, |tenant_shard_id: TenantShardId, node: Node| {
+                .tenant_for_shards(locations, |tenant_shard_id: TenantShardId, node: Node| {
                    Box::pin(delete_one(
                        tenant_shard_id,
                        timeline_id,
@@ -3501,9 +3618,9 @@ impl Service {
            // Delete shard zero last: this is not strictly necessary, but since a caller's GET on a timeline will be routed
            // to shard zero, it gives a more obvious behavior that a GET returns 404 once the deletion is done.
            let shard_zero_status = delete_one(
-                shard_zero.0,
+                shard_zero_tid,
                timeline_id,
-                shard_zero.1,
+                shard_zero_locations.latest.node,
                self.config.jwt_token.clone(),
            )
            .await?;
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -17,6 +17,7 @@ use crate::{
    service::ReconcileResultRequest,
 };
 use futures::future::{self, Either};
+use itertools::Itertools;
 use pageserver_api::controller_api::{
    AvailabilityZone, NodeSchedulingPolicy, PlacementPolicy, ShardSchedulingPolicy,
 };
@@ -1410,6 +1411,32 @@ impl TenantShard {
    pub(crate) fn set_preferred_az(&mut self, preferred_az_id: AvailabilityZone) {
        self.preferred_az_id = Some(preferred_az_id);
    }
+
+    /// Returns all the nodes to which this tenant shard is attached according to the
+    /// observed state and the generations. Return vector is sorted from latest generation
+    /// to earliest.
+    pub(crate) fn attached_locations(&self) -> Vec<(NodeId, Generation)> {
+        self.observed
+            .locations
+            .iter()
+            .filter_map(|(node_id, observed)| {
+                use LocationConfigMode::{AttachedMulti, AttachedSingle, AttachedStale};
+
+                let conf = observed.conf.as_ref()?;
+
+                match (conf.generation, conf.mode) {
+                    (Some(gen), AttachedMulti | AttachedSingle | AttachedStale) => {
+                        Some((*node_id, gen))
+                    }
+                    _ => None,
+                }
+            })
+            .sorted_by(|(_lhs_node_id, lhs_gen), (_rhs_node_id, rhs_gen)| {
+                lhs_gen.cmp(rhs_gen).reverse()
+            })
+            .map(|(node_id, gen)| (node_id, Generation::new(gen)))
+            .collect()
+    }
 }

 #[cfg(test)]
--- a/storage_scrubber/Cargo.toml
+++ b/storage_scrubber/Cargo.toml
@@ -5,6 +5,7 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
+aws-config.workspace = true
 aws-sdk-s3.workspace = true
 either.workspace = true
 anyhow.workspace = true
@@ -31,7 +32,6 @@ storage_controller_client.workspace = true
 tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
 chrono = { workspace = true, default-features = false, features = ["clock", "serde"] }
 reqwest = { workspace = true, default-features = false, features = ["rustls-tls", "json"] }
-aws-config = { workspace = true, default-features = false, features = ["rustls", "sso"] }

 pageserver = { path = "../pageserver" }
 pageserver_api = { path = "../libs/pageserver_api" }
--- a/storage_scrubber/src/lib.rs
+++ b/storage_scrubber/src/lib.rs
@@ -28,8 +28,9 @@ use pageserver::tenant::remote_timeline_client::{remote_tenant_path, remote_time
 use pageserver::tenant::TENANTS_SEGMENT_NAME;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::{
-    GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig, RemoteStorageKind,
-    S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
+    DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig,
+    RemoteStorageKind, S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
+    DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
 };
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
@@ -488,7 +489,10 @@ async fn download_object_with_retries(
    let cancel = CancellationToken::new();
    for trial in 0..MAX_RETRIES {
        let mut buf = Vec::new();
-        let download = match remote_client.download(key, &cancel).await {
+        let download = match remote_client
+            .download(key, &DownloadOpts::default(), &cancel)
+            .await
+        {
            Ok(response) => response,
            Err(e) => {
                error!("Failed to download object for key {key}: {e}");
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -0,0 +1,662 @@
+from __future__ import annotations
+
+import abc
+import json
+import os
+import re
+import subprocess
+import tempfile
+import textwrap
+from itertools import chain, product
+from pathlib import Path
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    cast,
+)
+
+import toml
+
+from fixtures.common_types import Lsn, TenantId, TimelineId
+from fixtures.log_helper import log
+from fixtures.pageserver.common_types import IndexPartDump
+from fixtures.pg_version import PgVersion
+from fixtures.utils import AuxFileStore
+
+T = TypeVar("T")
+
+
+class AbstractNeonCli(abc.ABC):
+    """
+    A typed wrapper around an arbitrary Neon CLI tool.
+    Supports a way to run arbitrary command directly via CLI.
+    Do not use directly, use specific subclasses instead.
+    """
+
+    def __init__(self, extra_env: Optional[Dict[str, str]], binpath: Path):
+        self.extra_env = extra_env
+        self.binpath = binpath
+
+    COMMAND: str = cast(str, None)  # To be overwritten by the derived class.
+
+    def raw_cli(
+        self,
+        arguments: List[str],
+        extra_env_vars: Optional[Dict[str, str]] = None,
+        check_return_code=True,
+        timeout=None,
+    ) -> "subprocess.CompletedProcess[str]":
+        """
+        Run the command with the specified arguments.
+
+        Arguments must be in list form, e.g. ['endpoint', 'create']
+
+        Return both stdout and stderr, which can be accessed as
+
+        >>> result = env.neon_cli.raw_cli(...)
+        >>> assert result.stderr == ""
+        >>> log.info(result.stdout)
+
+        If `check_return_code`, on non-zero exit code logs failure and raises.
+        """
+
+        assert isinstance(arguments, list)
+        assert isinstance(self.COMMAND, str)
+
+        command_path = str(self.binpath / self.COMMAND)
+
+        args = [command_path] + arguments
+        log.info('Running command "{}"'.format(" ".join(args)))
+
+        env_vars = os.environ.copy()
+
+        # extra env
+        for extra_env_key, extra_env_value in (self.extra_env or {}).items():
+            env_vars[extra_env_key] = extra_env_value
+        for extra_env_key, extra_env_value in (extra_env_vars or {}).items():
+            env_vars[extra_env_key] = extra_env_value
+
+        # Pass through coverage settings
+        var = "LLVM_PROFILE_FILE"
+        val = os.environ.get(var)
+        if val:
+            env_vars[var] = val
+
+        # Intercept CalledProcessError and print more info
+        try:
+            res = subprocess.run(
+                args,
+                env=env_vars,
+                check=False,
+                universal_newlines=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                timeout=timeout,
+            )
+        except subprocess.TimeoutExpired as e:
+            if e.stderr:
+                stderr = e.stderr.decode(errors="replace")
+            else:
+                stderr = ""
+
+            if e.stdout:
+                stdout = e.stdout.decode(errors="replace")
+            else:
+                stdout = ""
+
+            log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
+            raise
+
+        indent = "  "
+        if not res.returncode:
+            stripped = res.stdout.strip()
+            lines = stripped.splitlines()
+            if len(lines) < 2:
+                log.debug(f"Run {res.args} success: {stripped}")
+            else:
+                log.debug("Run %s success:\n%s" % (res.args, textwrap.indent(stripped, indent)))
+        elif check_return_code:
+            # this way command output will be in recorded and shown in CI in failure message
+            indent = indent * 2
+            msg = textwrap.dedent(
+                """\
+            Run %s failed:
+              stdout:
+            %s
+              stderr:
+            %s
+            """
+            )
+            msg = msg % (
+                res.args,
+                textwrap.indent(res.stdout.strip(), indent),
+                textwrap.indent(res.stderr.strip(), indent),
+            )
+            log.info(msg)
+            raise RuntimeError(msg) from subprocess.CalledProcessError(
+                res.returncode, res.args, res.stdout, res.stderr
+            )
+        return res
+
+
+class NeonLocalCli(AbstractNeonCli):
+    """A typed wrapper around the `neon_local` CLI tool.
+    Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
+
+    Note: The methods in this class are supposed to be faithful wrappers of the underlying
+    'neon_local' commands. If you're tempted to add any logic here, please consider putting it
+    in the caller instead!
+
+    There are a few exceptions where these wrapper methods intentionally differ from the
+    underlying commands, however:
+    - Many 'neon_local' commands take an optional 'tenant_id' argument and use the default from
+      the config file if it's omitted. The corresponding wrappers require an explicit 'tenant_id'
+      argument. The idea is that we don't want to rely on the config file's default in tests,
+      because NeonEnv has its own 'initial_tenant'. They are currently always the same, but we
+      want to rely on the Neonenv's default instead of the config file default in tests.
+
+    - Similarly, --pg_version argument is always required in the wrappers, even when it's
+      optional in the 'neon_local' command. The default in 'neon_local' is a specific
+      hardcoded version, but in tests, we never want to accidentally rely on that;, we
+      always want to use the version from the test fixtures.
+
+    - Wrappers for commands that create a new tenant or timeline ID require the new tenant
+      or timeline ID to be passed by the caller, while the 'neon_local' commands will
+      generate a random ID if it's not specified. This is because we don't want to have to
+      parse the ID from the 'neon_local' output. Making it required ensures that the
+      caller has to generate it.
+    """
+
+    COMMAND = "neon_local"
+
+    def __init__(
+        self,
+        extra_env: Optional[Dict[str, str]],
+        binpath: Path,
+        repo_dir: Path,
+        pg_distrib_dir: Path,
+    ):
+        if extra_env is None:
+            env_vars = {}
+        else:
+            env_vars = extra_env.copy()
+        env_vars["NEON_REPO_DIR"] = str(repo_dir)
+        env_vars["POSTGRES_DISTRIB_DIR"] = str(pg_distrib_dir)
+
+        super().__init__(env_vars, binpath)
+
+    def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
+        return super().raw_cli(*args, **kwargs)
+
+    def tenant_create(
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        pg_version: PgVersion,
+        conf: Optional[Dict[str, Any]] = None,
+        shard_count: Optional[int] = None,
+        shard_stripe_size: Optional[int] = None,
+        placement_policy: Optional[str] = None,
+        set_default: bool = False,
+        aux_file_policy: Optional[AuxFileStore] = None,
+    ):
+        """
+        Creates a new tenant, returns its id and its initial timeline's id.
+        """
+        args = [
+            "tenant",
+            "create",
+            "--tenant-id",
+            str(tenant_id),
+            "--timeline-id",
+            str(timeline_id),
+            "--pg-version",
+            pg_version,
+        ]
+        if conf is not None:
+            args.extend(
+                chain.from_iterable(
+                    product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
+                )
+            )
+
+        if aux_file_policy is AuxFileStore.V2:
+            args.extend(["-c", "switch_aux_file_policy:v2"])
+        elif aux_file_policy is AuxFileStore.V1:
+            args.extend(["-c", "switch_aux_file_policy:v1"])
+        elif aux_file_policy is AuxFileStore.CrossValidation:
+            args.extend(["-c", "switch_aux_file_policy:cross-validation"])
+
+        if set_default:
+            args.append("--set-default")
+
+        if shard_count is not None:
+            args.extend(["--shard-count", str(shard_count)])
+
+        if shard_stripe_size is not None:
+            args.extend(["--shard-stripe-size", str(shard_stripe_size)])
+
+        if placement_policy is not None:
+            args.extend(["--placement-policy", str(placement_policy)])
+
+        res = self.raw_cli(args)
+        res.check_returncode()
+
+    def tenant_import(self, tenant_id: TenantId):
+        args = ["tenant", "import", "--tenant-id", str(tenant_id)]
+        res = self.raw_cli(args)
+        res.check_returncode()
+
+    def tenant_set_default(self, tenant_id: TenantId):
+        """
+        Update default tenant for future operations that require tenant_id.
+        """
+        res = self.raw_cli(["tenant", "set-default", "--tenant-id", str(tenant_id)])
+        res.check_returncode()
+
+    def tenant_config(self, tenant_id: TenantId, conf: Dict[str, str]):
+        """
+        Update tenant config.
+        """
+
+        args = ["tenant", "config", "--tenant-id", str(tenant_id)]
+        if conf is not None:
+            args.extend(
+                chain.from_iterable(
+                    product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
+                )
+            )
+
+        res = self.raw_cli(args)
+        res.check_returncode()
+
+    def tenant_list(self) -> "subprocess.CompletedProcess[str]":
+        res = self.raw_cli(["tenant", "list"])
+        res.check_returncode()
+        return res
+
+    def timeline_create(
+        self,
+        new_branch_name: str,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        pg_version: PgVersion,
+    ) -> TimelineId:
+        if timeline_id is None:
+            timeline_id = TimelineId.generate()
+
+        cmd = [
+            "timeline",
+            "create",
+            "--branch-name",
+            new_branch_name,
+            "--tenant-id",
+            str(tenant_id),
+            "--timeline-id",
+            str(timeline_id),
+            "--pg-version",
+            pg_version,
+        ]
+
+        res = self.raw_cli(cmd)
+        res.check_returncode()
+
+        return timeline_id
+
+    def timeline_branch(
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        new_branch_name,
+        ancestor_branch_name: Optional[str] = None,
+        ancestor_start_lsn: Optional[Lsn] = None,
+    ):
+        cmd = [
+            "timeline",
+            "branch",
+            "--branch-name",
+            new_branch_name,
+            "--timeline-id",
+            str(timeline_id),
+            "--tenant-id",
+            str(tenant_id),
+        ]
+        if ancestor_branch_name is not None:
+            cmd.extend(["--ancestor-branch-name", ancestor_branch_name])
+        if ancestor_start_lsn is not None:
+            cmd.extend(["--ancestor-start-lsn", str(ancestor_start_lsn)])
+
+        res = self.raw_cli(cmd)
+        res.check_returncode()
+
+    def timeline_import(
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        new_branch_name: str,
+        base_lsn: Lsn,
+        base_tarfile: Path,
+        pg_version: PgVersion,
+        end_lsn: Optional[Lsn] = None,
+        wal_tarfile: Optional[Path] = None,
+    ):
+        cmd = [
+            "timeline",
+            "import",
+            "--tenant-id",
+            str(tenant_id),
+            "--timeline-id",
+            str(timeline_id),
+            "--pg-version",
+            pg_version,
+            "--branch-name",
+            new_branch_name,
+            "--base-lsn",
+            str(base_lsn),
+            "--base-tarfile",
+            str(base_tarfile),
+        ]
+        if end_lsn is not None:
+            cmd.extend(["--end-lsn", str(end_lsn)])
+        if wal_tarfile is not None:
+            cmd.extend(["--wal-tarfile", str(wal_tarfile)])
+
+        res = self.raw_cli(cmd)
+        res.check_returncode()
+
+    def timeline_list(self, tenant_id: TenantId) -> List[Tuple[str, TimelineId]]:
+        """
+        Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
+        """
+
+        # main [b49f7954224a0ad25cc0013ea107b54b]
+        # ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
+        TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile(  # type: ignore[type-arg]
+            r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
+        )
+        res = self.raw_cli(["timeline", "list", "--tenant-id", str(tenant_id)])
+        timelines_cli = sorted(
+            map(
+                lambda branch_and_id: (branch_and_id[0], TimelineId(branch_and_id[1])),
+                TIMELINE_DATA_EXTRACTOR.findall(res.stdout),
+            )
+        )
+        return timelines_cli
+
+    def init(
+        self,
+        init_config: Dict[str, Any],
+        force: Optional[str] = None,
+    ) -> "subprocess.CompletedProcess[str]":
+        with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
+            init_config_tmpfile.write(toml.dumps(init_config))
+            init_config_tmpfile.flush()
+
+            cmd = [
+                "init",
+                f"--config={init_config_tmpfile.name}",
+            ]
+
+            if force is not None:
+                cmd.extend(["--force", force])
+
+            res = self.raw_cli(cmd)
+            res.check_returncode()
+        return res
+
+    def storage_controller_start(
+        self,
+        timeout_in_seconds: Optional[int] = None,
+        instance_id: Optional[int] = None,
+        base_port: Optional[int] = None,
+    ):
+        cmd = ["storage_controller", "start"]
+        if timeout_in_seconds is not None:
+            cmd.append(f"--start-timeout={timeout_in_seconds}s")
+        if instance_id is not None:
+            cmd.append(f"--instance-id={instance_id}")
+        if base_port is not None:
+            cmd.append(f"--base-port={base_port}")
+        return self.raw_cli(cmd)
+
+    def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
+        cmd = ["storage_controller", "stop"]
+        if immediate:
+            cmd.extend(["-m", "immediate"])
+        if instance_id is not None:
+            cmd.append(f"--instance-id={instance_id}")
+        return self.raw_cli(cmd)
+
+    def pageserver_start(
+        self,
+        id: int,
+        extra_env_vars: Optional[Dict[str, str]] = None,
+        timeout_in_seconds: Optional[int] = None,
+    ) -> "subprocess.CompletedProcess[str]":
+        start_args = ["pageserver", "start", f"--id={id}"]
+        if timeout_in_seconds is not None:
+            start_args.append(f"--start-timeout={timeout_in_seconds}s")
+        return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
+
+    def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
+        cmd = ["pageserver", "stop", f"--id={id}"]
+        if immediate:
+            cmd.extend(["-m", "immediate"])
+
+        log.info(f"Stopping pageserver with {cmd}")
+        return self.raw_cli(cmd)
+
+    def safekeeper_start(
+        self,
+        id: int,
+        extra_opts: Optional[List[str]] = None,
+        extra_env_vars: Optional[Dict[str, str]] = None,
+        timeout_in_seconds: Optional[int] = None,
+    ) -> "subprocess.CompletedProcess[str]":
+        if extra_opts is not None:
+            extra_opts = [f"-e={opt}" for opt in extra_opts]
+        else:
+            extra_opts = []
+        if timeout_in_seconds is not None:
+            extra_opts.append(f"--start-timeout={timeout_in_seconds}s")
+        return self.raw_cli(
+            ["safekeeper", "start", str(id), *extra_opts], extra_env_vars=extra_env_vars
+        )
+
+    def safekeeper_stop(
+        self, id: Optional[int] = None, immediate=False
+    ) -> "subprocess.CompletedProcess[str]":
+        args = ["safekeeper", "stop"]
+        if id is not None:
+            args.append(str(id))
+        if immediate:
+            args.extend(["-m", "immediate"])
+        return self.raw_cli(args)
+
+    def storage_broker_start(
+        self, timeout_in_seconds: Optional[int] = None
+    ) -> "subprocess.CompletedProcess[str]":
+        cmd = ["storage_broker", "start"]
+        if timeout_in_seconds is not None:
+            cmd.append(f"--start-timeout={timeout_in_seconds}s")
+        return self.raw_cli(cmd)
+
+    def storage_broker_stop(self) -> "subprocess.CompletedProcess[str]":
+        cmd = ["storage_broker", "stop"]
+        return self.raw_cli(cmd)
+
+    def endpoint_create(
+        self,
+        branch_name: str,
+        pg_port: int,
+        http_port: int,
+        tenant_id: TenantId,
+        pg_version: PgVersion,
+        endpoint_id: Optional[str] = None,
+        hot_standby: bool = False,
+        lsn: Optional[Lsn] = None,
+        pageserver_id: Optional[int] = None,
+        allow_multiple=False,
+    ) -> "subprocess.CompletedProcess[str]":
+        args = [
+            "endpoint",
+            "create",
+            "--tenant-id",
+            str(tenant_id),
+            "--branch-name",
+            branch_name,
+            "--pg-version",
+            pg_version,
+        ]
+        if lsn is not None:
+            args.extend(["--lsn", str(lsn)])
+        if pg_port is not None:
+            args.extend(["--pg-port", str(pg_port)])
+        if http_port is not None:
+            args.extend(["--http-port", str(http_port)])
+        if endpoint_id is not None:
+            args.append(endpoint_id)
+        if hot_standby:
+            args.extend(["--hot-standby", "true"])
+        if pageserver_id is not None:
+            args.extend(["--pageserver-id", str(pageserver_id)])
+        if allow_multiple:
+            args.extend(["--allow-multiple"])
+
+        res = self.raw_cli(args)
+        res.check_returncode()
+        return res
+
+    def endpoint_start(
+        self,
+        endpoint_id: str,
+        safekeepers: Optional[List[int]] = None,
+        remote_ext_config: Optional[str] = None,
+        pageserver_id: Optional[int] = None,
+        allow_multiple=False,
+        basebackup_request_tries: Optional[int] = None,
+    ) -> "subprocess.CompletedProcess[str]":
+        args = [
+            "endpoint",
+            "start",
+        ]
+        extra_env_vars = {}
+        if basebackup_request_tries is not None:
+            extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
+        if remote_ext_config is not None:
+            args.extend(["--remote-ext-config", remote_ext_config])
+
+        if safekeepers is not None:
+            args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
+        if endpoint_id is not None:
+            args.append(endpoint_id)
+        if pageserver_id is not None:
+            args.extend(["--pageserver-id", str(pageserver_id)])
+        if allow_multiple:
+            args.extend(["--allow-multiple"])
+
+        res = self.raw_cli(args, extra_env_vars)
+        res.check_returncode()
+        return res
+
+    def endpoint_reconfigure(
+        self,
+        endpoint_id: str,
+        tenant_id: Optional[TenantId] = None,
+        pageserver_id: Optional[int] = None,
+        safekeepers: Optional[List[int]] = None,
+        check_return_code=True,
+    ) -> "subprocess.CompletedProcess[str]":
+        args = ["endpoint", "reconfigure", endpoint_id]
+        if tenant_id is not None:
+            args.extend(["--tenant-id", str(tenant_id)])
+        if pageserver_id is not None:
+            args.extend(["--pageserver-id", str(pageserver_id)])
+        if safekeepers is not None:
+            args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
+        return self.raw_cli(args, check_return_code=check_return_code)
+
+    def endpoint_stop(
+        self,
+        endpoint_id: str,
+        destroy=False,
+        check_return_code=True,
+        mode: Optional[str] = None,
+    ) -> "subprocess.CompletedProcess[str]":
+        args = [
+            "endpoint",
+            "stop",
+        ]
+        if destroy:
+            args.append("--destroy")
+        if mode is not None:
+            args.append(f"--mode={mode}")
+        if endpoint_id is not None:
+            args.append(endpoint_id)
+
+        return self.raw_cli(args, check_return_code=check_return_code)
+
+    def mappings_map_branch(
+        self, name: str, tenant_id: TenantId, timeline_id: TimelineId
+    ) -> "subprocess.CompletedProcess[str]":
+        """
+        Map tenant id and timeline id to a neon_local branch name. They do not have to exist.
+        Usually needed when creating branches via PageserverHttpClient and not neon_local.
+
+        After creating a name mapping, you can use EndpointFactory.create_start
+        with this registered branch name.
+        """
+        args = [
+            "mappings",
+            "map",
+            "--branch-name",
+            name,
+            "--tenant-id",
+            str(tenant_id),
+            "--timeline-id",
+            str(timeline_id),
+        ]
+
+        return self.raw_cli(args, check_return_code=True)
+
+    def start(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
+        return self.raw_cli(["start"], check_return_code=check_return_code)
+
+    def stop(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
+        return self.raw_cli(["stop"], check_return_code=check_return_code)
+
+
+class WalCraft(AbstractNeonCli):
+    """
+    A typed wrapper around the `wal_craft` CLI tool.
+    Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
+    """
+
+    COMMAND = "wal_craft"
+
+    def postgres_config(self) -> List[str]:
+        res = self.raw_cli(["print-postgres-config"])
+        res.check_returncode()
+        return res.stdout.split("\n")
+
+    def in_existing(self, type: str, connection: str) -> None:
+        res = self.raw_cli(["in-existing", type, connection])
+        res.check_returncode()
+
+
+class Pagectl(AbstractNeonCli):
+    """
+    A typed wrapper around the `pagectl` utility CLI tool.
+    """
+
+    COMMAND = "pagectl"
+
+    def dump_index_part(self, path: Path) -> IndexPartDump:
+        res = self.raw_cli(["index-part", "dump", str(path)])
+        res.check_returncode()
+        parsed = json.loads(res.stdout)
+        return IndexPartDump.from_json(parsed)
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -9,8 +9,6 @@ import os
 import re
 import shutil
 import subprocess
-import tempfile
-import textwrap
 import threading
 import time
 import uuid
@@ -21,7 +19,6 @@ from datetime import datetime
 from enum import Enum
 from fcntl import LOCK_EX, LOCK_UN, flock
 from functools import cached_property
-from itertools import chain, product
 from pathlib import Path
 from types import TracebackType
 from typing import (
@@ -64,11 +61,12 @@ from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, Timeline
 from fixtures.endpoint.http import EndpointHttpClient
 from fixtures.log_helper import log
 from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
+from fixtures.neon_cli import NeonLocalCli, Pagectl
 from fixtures.pageserver.allowed_errors import (
    DEFAULT_PAGESERVER_ALLOWED_ERRORS,
    DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS,
 )
-from fixtures.pageserver.common_types import IndexPartDump, LayerName, parse_layer_file_name
+from fixtures.pageserver.common_types import LayerName, parse_layer_file_name
 from fixtures.pageserver.http import PageserverHttpClient
 from fixtures.pageserver.utils import (
    wait_for_last_record_lsn,
@@ -491,7 +489,7 @@ class NeonEnvBuilder:
        log.debug(
            f"Services started, creating initial tenant {env.initial_tenant} and its initial timeline"
        )
-        initial_tenant, initial_timeline = env.neon_cli.create_tenant(
+        initial_tenant, initial_timeline = env.create_tenant(
            tenant_id=env.initial_tenant,
            conf=initial_tenant_conf,
            timeline_id=env.initial_timeline,
@@ -952,10 +950,16 @@ class NeonEnv:

    initial_tenant - tenant ID of the initial tenant created in the repository

-    neon_cli - can be used to run the 'neon' CLI tool
+    neon_cli - can be used to run the 'neon_local' CLI tool

-    create_tenant() - initializes a new tenant in the page server, returns
-        the tenant id
+    create_tenant() - initializes a new tenant and an initial empty timeline on it,
+        returns the tenant and timeline id
+
+    create_branch() - branch a new timeline from an existing one, returns
+        the new timeline id
+
+    create_timeline() - initializes a new timeline by running initdb, returns
+        the new timeline id
    """

    BASE_PAGESERVER_ID = 1
@@ -966,8 +970,6 @@ class NeonEnv:
        self.rust_log_override = config.rust_log_override
        self.port_distributor = config.port_distributor
        self.s3_mock_server = config.mock_s3_server
-        self.neon_cli = NeonCli(env=self)
-        self.pagectl = Pagectl(env=self)
        self.endpoints = EndpointFactory(self)
        self.safekeepers: List[Safekeeper] = []
        self.pageservers: List[NeonPageserver] = []
@@ -987,6 +989,21 @@ class NeonEnv:
        self.initial_tenant = config.initial_tenant
        self.initial_timeline = config.initial_timeline

+        neon_local_env_vars = {}
+        if self.rust_log_override is not None:
+            neon_local_env_vars["RUST_LOG"] = self.rust_log_override
+        self.neon_cli = NeonLocalCli(
+            extra_env=neon_local_env_vars,
+            binpath=self.neon_local_binpath,
+            repo_dir=self.repo_dir,
+            pg_distrib_dir=self.pg_distrib_dir,
+        )
+
+        pagectl_env_vars = {}
+        if self.rust_log_override is not None:
+            pagectl_env_vars["RUST_LOG"] = self.rust_log_override
+        self.pagectl = Pagectl(extra_env=pagectl_env_vars, binpath=self.neon_binpath)
+
        # The URL for the pageserver to use as its control_plane_api config
        if config.storage_controller_port_override is not None:
            log.info(
@@ -1310,6 +1327,74 @@ class NeonEnv:
        self.endpoint_counter += 1
        return "ep-" + str(self.endpoint_counter)

+    def create_tenant(
+        self,
+        tenant_id: Optional[TenantId] = None,
+        timeline_id: Optional[TimelineId] = None,
+        conf: Optional[Dict[str, Any]] = None,
+        shard_count: Optional[int] = None,
+        shard_stripe_size: Optional[int] = None,
+        placement_policy: Optional[str] = None,
+        set_default: bool = False,
+        aux_file_policy: Optional[AuxFileStore] = None,
+    ) -> Tuple[TenantId, TimelineId]:
+        """
+        Creates a new tenant, returns its id and its initial timeline's id.
+        """
+        tenant_id = tenant_id or TenantId.generate()
+        timeline_id = timeline_id or TimelineId.generate()
+
+        self.neon_cli.tenant_create(
+            tenant_id=tenant_id,
+            timeline_id=timeline_id,
+            pg_version=self.pg_version,
+            conf=conf,
+            shard_count=shard_count,
+            shard_stripe_size=shard_stripe_size,
+            placement_policy=placement_policy,
+            set_default=set_default,
+            aux_file_policy=aux_file_policy,
+        )
+
+        return tenant_id, timeline_id
+
+    def config_tenant(self, tenant_id: Optional[TenantId], conf: Dict[str, str]):
+        """
+        Update tenant config.
+        """
+        tenant_id = tenant_id or self.initial_tenant
+        self.neon_cli.tenant_config(tenant_id, conf)
+
+    def create_branch(
+        self,
+        new_branch_name: str = DEFAULT_BRANCH_NAME,
+        tenant_id: Optional[TenantId] = None,
+        ancestor_branch_name: Optional[str] = None,
+        ancestor_start_lsn: Optional[Lsn] = None,
+        new_timeline_id: Optional[TimelineId] = None,
+    ) -> TimelineId:
+        new_timeline_id = new_timeline_id or TimelineId.generate()
+        tenant_id = tenant_id or self.initial_tenant
+
+        self.neon_cli.timeline_branch(
+            tenant_id, new_timeline_id, new_branch_name, ancestor_branch_name, ancestor_start_lsn
+        )
+
+        return new_timeline_id
+
+    def create_timeline(
+        self,
+        new_branch_name: str,
+        tenant_id: Optional[TenantId] = None,
+        timeline_id: Optional[TimelineId] = None,
+    ) -> TimelineId:
+        timeline_id = timeline_id or TimelineId.generate()
+        tenant_id = tenant_id or self.initial_tenant
+
+        self.neon_cli.timeline_create(new_branch_name, tenant_id, timeline_id, self.pg_version)
+
+        return timeline_id
+

@pytest.fixture(scope="function")
 def neon_simple_env(
@@ -1425,597 +1510,6 @@ class PageserverPort:
    http: int


-class AbstractNeonCli(abc.ABC):
-    """
-    A typed wrapper around an arbitrary Neon CLI tool.
-    Supports a way to run arbitrary command directly via CLI.
-    Do not use directly, use specific subclasses instead.
-    """
-
-    def __init__(self, env: NeonEnv):
-        self.env = env
-
-    COMMAND: str = cast(str, None)  # To be overwritten by the derived class.
-
-    def raw_cli(
-        self,
-        arguments: List[str],
-        extra_env_vars: Optional[Dict[str, str]] = None,
-        check_return_code=True,
-        timeout=None,
-        local_binpath=False,
-    ) -> "subprocess.CompletedProcess[str]":
-        """
-        Run the command with the specified arguments.
-
-        Arguments must be in list form, e.g. ['pg', 'create']
-
-        Return both stdout and stderr, which can be accessed as
-
-        >>> result = env.neon_cli.raw_cli(...)
-        >>> assert result.stderr == ""
-        >>> log.info(result.stdout)
-
-        If `check_return_code`, on non-zero exit code logs failure and raises.
-
-        If `local_binpath` is true, then we are invoking a test utility
-        """
-
-        assert isinstance(arguments, list)
-        assert isinstance(self.COMMAND, str)
-
-        if local_binpath:
-            # Test utility
-            bin_neon = str(self.env.neon_local_binpath / self.COMMAND)
-        else:
-            # Normal binary
-            bin_neon = str(self.env.neon_binpath / self.COMMAND)
-
-        args = [bin_neon] + arguments
-        log.info('Running command "{}"'.format(" ".join(args)))
-
-        env_vars = os.environ.copy()
-        env_vars["NEON_REPO_DIR"] = str(self.env.repo_dir)
-        env_vars["POSTGRES_DISTRIB_DIR"] = str(self.env.pg_distrib_dir)
-        if self.env.rust_log_override is not None:
-            env_vars["RUST_LOG"] = self.env.rust_log_override
-        for extra_env_key, extra_env_value in (extra_env_vars or {}).items():
-            env_vars[extra_env_key] = extra_env_value
-
-        # Pass coverage settings
-        var = "LLVM_PROFILE_FILE"
-        val = os.environ.get(var)
-        if val:
-            env_vars[var] = val
-
-        # Intercept CalledProcessError and print more info
-        try:
-            res = subprocess.run(
-                args,
-                env=env_vars,
-                check=False,
-                universal_newlines=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                timeout=timeout,
-            )
-        except subprocess.TimeoutExpired as e:
-            if e.stderr:
-                stderr = e.stderr.decode(errors="replace")
-            else:
-                stderr = ""
-
-            if e.stdout:
-                stdout = e.stdout.decode(errors="replace")
-            else:
-                stdout = ""
-
-            log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
-            raise
-
-        indent = "  "
-        if not res.returncode:
-            stripped = res.stdout.strip()
-            lines = stripped.splitlines()
-            if len(lines) < 2:
-                log.debug(f"Run {res.args} success: {stripped}")
-            else:
-                log.debug("Run %s success:\n%s" % (res.args, textwrap.indent(stripped, indent)))
-        elif check_return_code:
-            # this way command output will be in recorded and shown in CI in failure message
-            indent = indent * 2
-            msg = textwrap.dedent(
-                """\
-            Run %s failed:
-              stdout:
-            %s
-              stderr:
-            %s
-            """
-            )
-            msg = msg % (
-                res.args,
-                textwrap.indent(res.stdout.strip(), indent),
-                textwrap.indent(res.stderr.strip(), indent),
-            )
-            log.info(msg)
-            raise RuntimeError(msg) from subprocess.CalledProcessError(
-                res.returncode, res.args, res.stdout, res.stderr
-            )
-        return res
-
-
-class NeonCli(AbstractNeonCli):
-    """
-    A typed wrapper around the `neon` CLI tool.
-    Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
-    """
-
-    COMMAND = "neon_local"
-
-    def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
-        kwargs["local_binpath"] = True
-        return super().raw_cli(*args, **kwargs)
-
-    def create_tenant(
-        self,
-        tenant_id: Optional[TenantId] = None,
-        timeline_id: Optional[TimelineId] = None,
-        conf: Optional[Dict[str, Any]] = None,
-        shard_count: Optional[int] = None,
-        shard_stripe_size: Optional[int] = None,
-        placement_policy: Optional[str] = None,
-        set_default: bool = False,
-        aux_file_policy: Optional[AuxFileStore] = None,
-    ) -> Tuple[TenantId, TimelineId]:
-        """
-        Creates a new tenant, returns its id and its initial timeline's id.
-        """
-        tenant_id = tenant_id or TenantId.generate()
-        timeline_id = timeline_id or TimelineId.generate()
-
-        args = [
-            "tenant",
-            "create",
-            "--tenant-id",
-            str(tenant_id),
-            "--timeline-id",
-            str(timeline_id),
-            "--pg-version",
-            self.env.pg_version,
-        ]
-        if conf is not None:
-            args.extend(
-                chain.from_iterable(
-                    product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
-                )
-            )
-
-        if aux_file_policy is AuxFileStore.V2:
-            args.extend(["-c", "switch_aux_file_policy:v2"])
-        elif aux_file_policy is AuxFileStore.V1:
-            args.extend(["-c", "switch_aux_file_policy:v1"])
-        elif aux_file_policy is AuxFileStore.CrossValidation:
-            args.extend(["-c", "switch_aux_file_policy:cross-validation"])
-
-        if set_default:
-            args.append("--set-default")
-
-        if shard_count is not None:
-            args.extend(["--shard-count", str(shard_count)])
-
-        if shard_stripe_size is not None:
-            args.extend(["--shard-stripe-size", str(shard_stripe_size)])
-
-        if placement_policy is not None:
-            args.extend(["--placement-policy", str(placement_policy)])
-
-        res = self.raw_cli(args)
-        res.check_returncode()
-        return tenant_id, timeline_id
-
-    def import_tenant(self, tenant_id: TenantId):
-        args = ["tenant", "import", "--tenant-id", str(tenant_id)]
-        res = self.raw_cli(args)
-        res.check_returncode()
-
-    def set_default(self, tenant_id: TenantId):
-        """
-        Update default tenant for future operations that require tenant_id.
-        """
-        res = self.raw_cli(["tenant", "set-default", "--tenant-id", str(tenant_id)])
-        res.check_returncode()
-
-    def config_tenant(self, tenant_id: TenantId, conf: Dict[str, str]):
-        """
-        Update tenant config.
-        """
-
-        args = ["tenant", "config", "--tenant-id", str(tenant_id)]
-        if conf is not None:
-            args.extend(
-                chain.from_iterable(
-                    product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
-                )
-            )
-
-        res = self.raw_cli(args)
-        res.check_returncode()
-
-    def list_tenants(self) -> "subprocess.CompletedProcess[str]":
-        res = self.raw_cli(["tenant", "list"])
-        res.check_returncode()
-        return res
-
-    def create_timeline(
-        self,
-        new_branch_name: str,
-        tenant_id: Optional[TenantId] = None,
-        timeline_id: Optional[TimelineId] = None,
-    ) -> TimelineId:
-        if timeline_id is None:
-            timeline_id = TimelineId.generate()
-
-        cmd = [
-            "timeline",
-            "create",
-            "--branch-name",
-            new_branch_name,
-            "--tenant-id",
-            str(tenant_id or self.env.initial_tenant),
-            "--timeline-id",
-            str(timeline_id),
-            "--pg-version",
-            self.env.pg_version,
-        ]
-
-        res = self.raw_cli(cmd)
-        res.check_returncode()
-
-        return timeline_id
-
-    def create_branch(
-        self,
-        new_branch_name: str = DEFAULT_BRANCH_NAME,
-        ancestor_branch_name: Optional[str] = None,
-        tenant_id: Optional[TenantId] = None,
-        ancestor_start_lsn: Optional[Lsn] = None,
-        new_timeline_id: Optional[TimelineId] = None,
-    ) -> TimelineId:
-        if new_timeline_id is None:
-            new_timeline_id = TimelineId.generate()
-        cmd = [
-            "timeline",
-            "branch",
-            "--branch-name",
-            new_branch_name,
-            "--timeline-id",
-            str(new_timeline_id),
-            "--tenant-id",
-            str(tenant_id or self.env.initial_tenant),
-        ]
-        if ancestor_branch_name is not None:
-            cmd.extend(["--ancestor-branch-name", ancestor_branch_name])
-        if ancestor_start_lsn is not None:
-            cmd.extend(["--ancestor-start-lsn", str(ancestor_start_lsn)])
-
-        res = self.raw_cli(cmd)
-        res.check_returncode()
-
-        return TimelineId(str(new_timeline_id))
-
-    def list_timelines(self, tenant_id: Optional[TenantId] = None) -> List[Tuple[str, TimelineId]]:
-        """
-        Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
-        """
-
-        # main [b49f7954224a0ad25cc0013ea107b54b]
-        # ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
-        TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile(  # type: ignore[type-arg]
-            r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
-        )
-        res = self.raw_cli(
-            ["timeline", "list", "--tenant-id", str(tenant_id or self.env.initial_tenant)]
-        )
-        timelines_cli = sorted(
-            map(
-                lambda branch_and_id: (branch_and_id[0], TimelineId(branch_and_id[1])),
-                TIMELINE_DATA_EXTRACTOR.findall(res.stdout),
-            )
-        )
-        return timelines_cli
-
-    def init(
-        self,
-        init_config: Dict[str, Any],
-        force: Optional[str] = None,
-    ) -> "subprocess.CompletedProcess[str]":
-        with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
-            init_config_tmpfile.write(toml.dumps(init_config))
-            init_config_tmpfile.flush()
-
-            cmd = [
-                "init",
-                f"--config={init_config_tmpfile.name}",
-            ]
-
-            if force is not None:
-                cmd.extend(["--force", force])
-
-            res = self.raw_cli(cmd)
-            res.check_returncode()
-        return res
-
-    def storage_controller_start(
-        self,
-        timeout_in_seconds: Optional[int] = None,
-        instance_id: Optional[int] = None,
-        base_port: Optional[int] = None,
-    ):
-        cmd = ["storage_controller", "start"]
-        if timeout_in_seconds is not None:
-            cmd.append(f"--start-timeout={timeout_in_seconds}s")
-        if instance_id is not None:
-            cmd.append(f"--instance-id={instance_id}")
-        if base_port is not None:
-            cmd.append(f"--base-port={base_port}")
-        return self.raw_cli(cmd)
-
-    def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
-        cmd = ["storage_controller", "stop"]
-        if immediate:
-            cmd.extend(["-m", "immediate"])
-        if instance_id is not None:
-            cmd.append(f"--instance-id={instance_id}")
-        return self.raw_cli(cmd)
-
-    def pageserver_start(
-        self,
-        id: int,
-        extra_env_vars: Optional[Dict[str, str]] = None,
-        timeout_in_seconds: Optional[int] = None,
-    ) -> "subprocess.CompletedProcess[str]":
-        start_args = ["pageserver", "start", f"--id={id}"]
-        if timeout_in_seconds is not None:
-            start_args.append(f"--start-timeout={timeout_in_seconds}s")
-        storage = self.env.pageserver_remote_storage
-
-        if isinstance(storage, S3Storage):
-            s3_env_vars = storage.access_env_vars()
-            extra_env_vars = (extra_env_vars or {}) | s3_env_vars
-
-        return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
-
-    def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
-        cmd = ["pageserver", "stop", f"--id={id}"]
-        if immediate:
-            cmd.extend(["-m", "immediate"])
-
-        log.info(f"Stopping pageserver with {cmd}")
-        return self.raw_cli(cmd)
-
-    def safekeeper_start(
-        self,
-        id: int,
-        extra_opts: Optional[List[str]] = None,
-        timeout_in_seconds: Optional[int] = None,
-    ) -> "subprocess.CompletedProcess[str]":
-        s3_env_vars = None
-        if isinstance(self.env.safekeepers_remote_storage, S3Storage):
-            s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
-
-        if extra_opts is not None:
-            extra_opts = [f"-e={opt}" for opt in extra_opts]
-        else:
-            extra_opts = []
-        if timeout_in_seconds is not None:
-            extra_opts.append(f"--start-timeout={timeout_in_seconds}s")
-        return self.raw_cli(
-            ["safekeeper", "start", str(id), *extra_opts], extra_env_vars=s3_env_vars
-        )
-
-    def safekeeper_stop(
-        self, id: Optional[int] = None, immediate=False
-    ) -> "subprocess.CompletedProcess[str]":
-        args = ["safekeeper", "stop"]
-        if id is not None:
-            args.append(str(id))
-        if immediate:
-            args.extend(["-m", "immediate"])
-        return self.raw_cli(args)
-
-    def broker_start(
-        self, timeout_in_seconds: Optional[int] = None
-    ) -> "subprocess.CompletedProcess[str]":
-        cmd = ["storage_broker", "start"]
-        if timeout_in_seconds is not None:
-            cmd.append(f"--start-timeout={timeout_in_seconds}s")
-        return self.raw_cli(cmd)
-
-    def broker_stop(self) -> "subprocess.CompletedProcess[str]":
-        cmd = ["storage_broker", "stop"]
-        return self.raw_cli(cmd)
-
-    def endpoint_create(
-        self,
-        branch_name: str,
-        pg_port: int,
-        http_port: int,
-        endpoint_id: Optional[str] = None,
-        tenant_id: Optional[TenantId] = None,
-        hot_standby: bool = False,
-        lsn: Optional[Lsn] = None,
-        pageserver_id: Optional[int] = None,
-        allow_multiple=False,
-    ) -> "subprocess.CompletedProcess[str]":
-        args = [
-            "endpoint",
-            "create",
-            "--tenant-id",
-            str(tenant_id or self.env.initial_tenant),
-            "--branch-name",
-            branch_name,
-            "--pg-version",
-            self.env.pg_version,
-        ]
-        if lsn is not None:
-            args.extend(["--lsn", str(lsn)])
-        if pg_port is not None:
-            args.extend(["--pg-port", str(pg_port)])
-        if http_port is not None:
-            args.extend(["--http-port", str(http_port)])
-        if endpoint_id is not None:
-            args.append(endpoint_id)
-        if hot_standby:
-            args.extend(["--hot-standby", "true"])
-        if pageserver_id is not None:
-            args.extend(["--pageserver-id", str(pageserver_id)])
-        if allow_multiple:
-            args.extend(["--allow-multiple"])
-
-        res = self.raw_cli(args)
-        res.check_returncode()
-        return res
-
-    def endpoint_start(
-        self,
-        endpoint_id: str,
-        safekeepers: Optional[List[int]] = None,
-        remote_ext_config: Optional[str] = None,
-        pageserver_id: Optional[int] = None,
-        allow_multiple=False,
-        basebackup_request_tries: Optional[int] = None,
-    ) -> "subprocess.CompletedProcess[str]":
-        args = [
-            "endpoint",
-            "start",
-        ]
-        extra_env_vars = {}
-        if basebackup_request_tries is not None:
-            extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
-        if remote_ext_config is not None:
-            args.extend(["--remote-ext-config", remote_ext_config])
-
-        if safekeepers is not None:
-            args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
-        if endpoint_id is not None:
-            args.append(endpoint_id)
-        if pageserver_id is not None:
-            args.extend(["--pageserver-id", str(pageserver_id)])
-        if allow_multiple:
-            args.extend(["--allow-multiple"])
-
-        res = self.raw_cli(args, extra_env_vars)
-        res.check_returncode()
-        return res
-
-    def endpoint_reconfigure(
-        self,
-        endpoint_id: str,
-        tenant_id: Optional[TenantId] = None,
-        pageserver_id: Optional[int] = None,
-        safekeepers: Optional[List[int]] = None,
-        check_return_code=True,
-    ) -> "subprocess.CompletedProcess[str]":
-        args = ["endpoint", "reconfigure", endpoint_id]
-        if tenant_id is not None:
-            args.extend(["--tenant-id", str(tenant_id)])
-        if pageserver_id is not None:
-            args.extend(["--pageserver-id", str(pageserver_id)])
-        if safekeepers is not None:
-            args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
-        return self.raw_cli(args, check_return_code=check_return_code)
-
-    def endpoint_stop(
-        self,
-        endpoint_id: str,
-        destroy=False,
-        check_return_code=True,
-        mode: Optional[str] = None,
-    ) -> "subprocess.CompletedProcess[str]":
-        args = [
-            "endpoint",
-            "stop",
-        ]
-        if destroy:
-            args.append("--destroy")
-        if mode is not None:
-            args.append(f"--mode={mode}")
-        if endpoint_id is not None:
-            args.append(endpoint_id)
-
-        return self.raw_cli(args, check_return_code=check_return_code)
-
-    def map_branch(
-        self, name: str, tenant_id: TenantId, timeline_id: TimelineId
-    ) -> "subprocess.CompletedProcess[str]":
-        """
-        Map tenant id and timeline id to a neon_local branch name. They do not have to exist.
-        Usually needed when creating branches via PageserverHttpClient and not neon_local.
-
-        After creating a name mapping, you can use EndpointFactory.create_start
-        with this registered branch name.
-        """
-        args = [
-            "mappings",
-            "map",
-            "--branch-name",
-            name,
-            "--tenant-id",
-            str(tenant_id),
-            "--timeline-id",
-            str(timeline_id),
-        ]
-
-        return self.raw_cli(args, check_return_code=True)
-
-    def start(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
-        return self.raw_cli(["start"], check_return_code=check_return_code)
-
-    def stop(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
-        return self.raw_cli(["stop"], check_return_code=check_return_code)
-
-
-class WalCraft(AbstractNeonCli):
-    """
-    A typed wrapper around the `wal_craft` CLI tool.
-    Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
-    """
-
-    COMMAND = "wal_craft"
-
-    def postgres_config(self) -> List[str]:
-        res = self.raw_cli(["print-postgres-config"])
-        res.check_returncode()
-        return res.stdout.split("\n")
-
-    def in_existing(self, type: str, connection: str) -> None:
-        res = self.raw_cli(["in-existing", type, connection])
-        res.check_returncode()
-
-
-class ComputeCtl(AbstractNeonCli):
-    """
-    A typed wrapper around the `compute_ctl` CLI tool.
-    """
-
-    COMMAND = "compute_ctl"
-
-
-class Pagectl(AbstractNeonCli):
-    """
-    A typed wrapper around the `pagectl` utility CLI tool.
-    """
-
-    COMMAND = "pagectl"
-
-    def dump_index_part(self, path: Path) -> IndexPartDump:
-        res = self.raw_cli(["index-part", "dump", str(path)])
-        res.check_returncode()
-        parsed = json.loads(res.stdout)
-        return IndexPartDump.from_json(parsed)
-
-
 class LogUtils:
    """
    A mixin class which provides utilities for inspecting the logs of a service.
@@ -2933,6 +2427,10 @@ class NeonPageserver(PgProtocol, LogUtils):
        """
        assert self.running is False

+        storage = self.env.pageserver_remote_storage
+        if isinstance(storage, S3Storage):
+            s3_env_vars = storage.access_env_vars()
+            extra_env_vars = (extra_env_vars or {}) | s3_env_vars
        self.env.neon_cli.pageserver_start(
            self.id, extra_env_vars=extra_env_vars, timeout_in_seconds=timeout_in_seconds
        )
@@ -3953,6 +3451,7 @@ class Endpoint(PgProtocol, LogUtils):
            hot_standby=hot_standby,
            pg_port=self.pg_port,
            http_port=self.http_port,
+            pg_version=self.env.pg_version,
            pageserver_id=pageserver_id,
            allow_multiple=allow_multiple,
        )
@@ -4395,8 +3894,16 @@ class Safekeeper(LogUtils):
            extra_opts = self.extra_opts

        assert self.running is False
+
+        s3_env_vars = None
+        if isinstance(self.env.safekeepers_remote_storage, S3Storage):
+            s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
+
        self.env.neon_cli.safekeeper_start(
-            self.id, extra_opts=extra_opts, timeout_in_seconds=timeout_in_seconds
+            self.id,
+            extra_opts=extra_opts,
+            timeout_in_seconds=timeout_in_seconds,
+            extra_env_vars=s3_env_vars,
        )
        self.running = True
        # wait for wal acceptor start by checking its status
@@ -4542,7 +4049,7 @@ class Safekeeper(LogUtils):
        1) wait for remote_consistent_lsn and wal_backup_lsn on safekeeper to reach it.
        2) checkpoint timeline on safekeeper, which should remove WAL before this LSN; optionally wait for that.
        """
-        cli = self.http_client()
+        client = self.http_client()

        target_segment_file = lsn.segment_name()

@@ -4554,7 +4061,7 @@ class Safekeeper(LogUtils):
            assert all(target_segment_file <= s for s in segments)

        def are_lsns_advanced():
-            stat = cli.timeline_status(tenant_id, timeline_id)
+            stat = client.timeline_status(tenant_id, timeline_id)
            log.info(
                f"waiting for remote_consistent_lsn and backup_lsn on sk {self.id} to reach {lsn}, currently remote_consistent_lsn={stat.remote_consistent_lsn}, backup_lsn={stat.backup_lsn}"
            )
@@ -4563,7 +4070,7 @@ class Safekeeper(LogUtils):
        # xxx: max wait is long because we might be waiting for reconnection from
        # pageserver to this safekeeper
        wait_until(30, 1, are_lsns_advanced)
-        cli.checkpoint(tenant_id, timeline_id)
+        client.checkpoint(tenant_id, timeline_id)
        if wait_wal_removal:
            wait_until(30, 1, are_segments_removed)

@@ -4591,13 +4098,13 @@ class NeonBroker(LogUtils):
        timeout_in_seconds: Optional[int] = None,
    ):
        assert not self.running
-        self.env.neon_cli.broker_start(timeout_in_seconds)
+        self.env.neon_cli.storage_broker_start(timeout_in_seconds)
        self.running = True
        return self

    def stop(self):
        if self.running:
-            self.env.neon_cli.broker_stop()
+            self.env.neon_cli.storage_broker_stop()
            self.running = False
        return self

@@ -5226,10 +4733,10 @@ def flush_ep_to_pageserver(
    commit_lsn: Lsn = Lsn(0)
    # In principle in the absense of failures polling single sk would be enough.
    for sk in env.safekeepers:
-        cli = sk.http_client()
+        client = sk.http_client()
        # wait until compute connections are gone
-        wait_walreceivers_absent(cli, tenant, timeline)
-        commit_lsn = max(cli.get_commit_lsn(tenant, timeline), commit_lsn)
+        wait_walreceivers_absent(client, tenant, timeline)
+        commit_lsn = max(client.get_commit_lsn(tenant, timeline), commit_lsn)

    # Note: depending on WAL filtering implementation, probably most shards
    # won't be able to reach commit_lsn (unless gaps are also ack'ed), so this
@@ -5282,7 +4789,12 @@ def fork_at_current_lsn(
    the WAL up to that LSN to arrive in the pageserver before creating the branch.
    """
    current_lsn = endpoint.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
-    return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn)
+    return env.create_branch(
+        new_branch_name=new_branch_name,
+        tenant_id=tenant_id,
+        ancestor_branch_name=ancestor_branch_name,
+        ancestor_start_lsn=current_lsn,
+    )


 def import_timeline_from_vanilla_postgres(
@@ -5301,9 +4813,9 @@ def import_timeline_from_vanilla_postgres(
    """

    # Take backup of the existing PostgreSQL server with pg_basebackup
-    basebackup_dir = os.path.join(test_output_dir, "basebackup")
-    base_tar = os.path.join(basebackup_dir, "base.tar")
-    wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
+    basebackup_dir = test_output_dir / "basebackup"
+    base_tar = basebackup_dir / "base.tar"
+    wal_tar = basebackup_dir / "pg_wal.tar"
    os.mkdir(basebackup_dir)
    pg_bin.run(
        [
@@ -5313,40 +4825,28 @@ def import_timeline_from_vanilla_postgres(
            "-d",
            vanilla_pg_connstr,
            "-D",
-            basebackup_dir,
+            str(basebackup_dir),
        ]
    )

    # Extract start_lsn and end_lsn form the backup manifest file
    with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
        manifest = json.load(f)
-        start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
-        end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
+        start_lsn = Lsn(manifest["WAL-Ranges"][0]["Start-LSN"])
+        end_lsn = Lsn(manifest["WAL-Ranges"][0]["End-LSN"])

    # Import the backup tarballs into the pageserver
-    env.neon_cli.raw_cli(
-        [
-            "timeline",
-            "import",
-            "--tenant-id",
-            str(tenant_id),
-            "--timeline-id",
-            str(timeline_id),
-            "--branch-name",
-            branch_name,
-            "--base-lsn",
-            start_lsn,
-            "--base-tarfile",
-            base_tar,
-            "--end-lsn",
-            end_lsn,
-            "--wal-tarfile",
-            wal_tar,
-            "--pg-version",
-            env.pg_version,
-        ]
+    env.neon_cli.timeline_import(
+        tenant_id=tenant_id,
+        timeline_id=timeline_id,
+        new_branch_name=branch_name,
+        base_lsn=start_lsn,
+        base_tarfile=base_tar,
+        end_lsn=end_lsn,
+        wal_tarfile=wal_tar,
+        pg_version=env.pg_version,
    )
-    wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, Lsn(end_lsn))
+    wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, end_lsn)


 def last_flush_lsn_upload(
--- a/test_runner/fixtures/pageserver/remote_storage.py
+++ b/test_runner/fixtures/pageserver/remote_storage.py
@@ -7,7 +7,7 @@ from pathlib import Path
 from typing import Any, List, Tuple

 from fixtures.common_types import TenantId, TimelineId
-from fixtures.neon_fixtures import NeonEnv, Pagectl
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.pageserver.common_types import (
    InvalidFileName,
    parse_layer_file_name,
@@ -35,7 +35,7 @@ def duplicate_one_tenant(env: NeonEnv, template_tenant: TenantId, new_tenant: Te
        for file in tl.iterdir():
            shutil.copy2(file, dst_tl_dir)
            if "__" in file.name:
-                Pagectl(env).raw_cli(
+                env.pagectl.raw_cli(
                    [
                        "layer",
                        "rewrite-summary",
--- a/test_runner/performance/pageserver/interactive/test_many_small_tenants.py
+++ b/test_runner/performance/pageserver/interactive/test_many_small_tenants.py
@@ -53,7 +53,7 @@ def setup_env(
            "checkpoint_distance": 268435456,
            "image_creation_threshold": 3,
        }
-        template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
+        template_tenant, template_timeline = env.create_tenant(set_default=True)
        env.pageserver.tenant_detach(template_tenant)
        env.pageserver.tenant_attach(template_tenant, config)
        ep = env.endpoints.create_start("main", tenant_id=template_tenant)
--- a/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py
+++ b/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py
@@ -81,7 +81,7 @@ def setup_tenant_template(env: NeonEnv, n_txns: int):
        "image_creation_threshold": 3,
    }

-    template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
+    template_tenant, template_timeline = env.create_tenant(set_default=True)
    env.pageserver.tenant_detach(template_tenant)
    env.pageserver.tenant_attach(template_tenant, config)

--- a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
+++ b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
@@ -162,7 +162,7 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
        "checkpoint_distance": 268435456,
        "image_creation_threshold": 3,
    }
-    template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
+    template_tenant, template_timeline = env.create_tenant(set_default=True)
    env.pageserver.tenant_detach(template_tenant)
    env.pageserver.tenant_attach(template_tenant, config)
    ps_http = env.pageserver.http_client()
--- a/test_runner/performance/test_branch_creation.py
+++ b/test_runner/performance/test_branch_creation.py
@@ -41,7 +41,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
    pg_bin = neon_compare.pg_bin

    # Use aggressive GC and checkpoint settings, so GC and compaction happen more often during the test
-    tenant, _ = env.neon_cli.create_tenant(
+    tenant, _ = env.create_tenant(
        conf={
            "gc_period": "5 s",
            "gc_horizon": f"{4 * 1024 ** 2}",
@@ -64,7 +64,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)

        endpoint.stop()

-    env.neon_cli.create_branch("b0", tenant_id=tenant)
+    env.create_branch("b0", tenant_id=tenant)

    threads: List[threading.Thread] = []
    threads.append(threading.Thread(target=run_pgbench, args=("b0",), daemon=True))
@@ -78,7 +78,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
        p = random.randint(0, i)

        timer = timeit.default_timer()
-        env.neon_cli.create_branch(f"b{i + 1}", f"b{p}", tenant_id=tenant)
+        env.create_branch(f"b{i + 1}", ancestor_branch_name=f"b{p}", tenant_id=tenant)
        dur = timeit.default_timer() - timer

        log.info(f"Creating branch b{i+1} took {dur}s")
@@ -104,7 +104,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
    # seed the prng so we will measure the same structure every time
    rng = random.Random("2024-02-29")

-    env.neon_cli.create_branch("b0")
+    env.create_branch("b0")

    endpoint = env.endpoints.create_start("b0")
    neon_compare.pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", endpoint.connstr()])
@@ -121,7 +121,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:

        timer = timeit.default_timer()
        # each of these uploads to remote storage before completion
-        env.neon_cli.create_branch(f"b{i + 1}", parent)
+        env.create_branch(f"b{i + 1}", ancestor_branch_name=parent)
        dur = timeit.default_timer() - timer
        branch_creation_durations.append(dur)

@@ -222,7 +222,7 @@ def wait_and_record_startup_metrics(
 def test_branch_creation_many_relations(neon_compare: NeonCompare):
    env = neon_compare.env

-    timeline_id = env.neon_cli.create_branch("root")
+    timeline_id = env.create_branch("root")

    endpoint = env.endpoints.create_start("root")
    with closing(endpoint.connect()) as conn:
@@ -238,7 +238,7 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
    )

    with neon_compare.record_duration("create_branch_time_not_busy_root"):
-        env.neon_cli.create_branch("child_not_busy", "root")
+        env.create_branch("child_not_busy", ancestor_branch_name="root")

    # run a concurrent insertion to make the ancestor "busy" during the branch creation
    thread = threading.Thread(
@@ -247,6 +247,6 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
    thread.start()

    with neon_compare.record_duration("create_branch_time_busy_root"):
-        env.neon_cli.create_branch("child_busy", "root")
+        env.create_branch("child_busy", ancestor_branch_name="root")

    thread.join()
--- a/test_runner/performance/test_branching.py
+++ b/test_runner/performance/test_branching.py
@@ -41,7 +41,7 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
        )
        neon_compare.zenbenchmark.record_pg_bench_result(branch, res)

-    env.neon_cli.create_branch("root")
+    env.create_branch("root")
    endpoint_root = env.endpoints.create_start("root")
    pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", endpoint_root.connstr(), "-s10"])

@@ -55,14 +55,14 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):

 def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
    env = neon_compare.env
-    env.neon_cli.create_branch("root")
+    env.create_branch("root")
    endpoint_root = env.endpoints.create_start("root")

    endpoint_root.safe_psql(
        "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
    )

-    env.neon_cli.create_branch("child", "root")
+    env.create_branch("child", ancestor_branch_name="root")
    endpoint_child = env.endpoints.create_start("child")

    with neon_compare.record_duration("root_run_duration"):
@@ -73,7 +73,7 @@ def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):

 def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
    env = neon_compare.env
-    env.neon_cli.create_branch("root")
+    env.create_branch("root")
    endpoint_root = env.endpoints.create_start("root")

    endpoint_root.safe_psql_many(
@@ -83,7 +83,7 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
        ]
    )

-    env.neon_cli.create_branch("child", "root")
+    env.create_branch("child", ancestor_branch_name="root")
    endpoint_child = env.endpoints.create_start("child")

    with neon_compare.record_duration("root_run_duration"):
--- a/test_runner/performance/test_bulk_tenant_create.py
+++ b/test_runner/performance/test_bulk_tenant_create.py
@@ -26,10 +26,8 @@ def test_bulk_tenant_create(
    for i in range(tenants_count):
        start = timeit.default_timer()

-        tenant, _ = env.neon_cli.create_tenant()
-        env.neon_cli.create_timeline(
-            f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant
-        )
+        tenant, _ = env.create_tenant()
+        env.create_timeline(f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant)

        # FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
        # if use_safekeepers == 'with_sa':
--- a/test_runner/performance/test_bulk_update.py
+++ b/test_runner/performance/test_bulk_update.py
@@ -16,7 +16,7 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
    env = neon_env_builder.init_start()
    n_records = 1000000

-    timeline_id = env.neon_cli.create_branch("test_bulk_update")
+    timeline_id = env.create_branch("test_bulk_update")
    tenant_id = env.initial_tenant
    endpoint = env.endpoints.create_start("test_bulk_update")
    cur = endpoint.connect().cursor()
--- a/test_runner/performance/test_compaction.py
+++ b/test_runner/performance/test_compaction.py
@@ -17,7 +17,7 @@ def test_compaction(neon_compare: NeonCompare):
    env = neon_compare.env
    pageserver_http = env.pageserver.http_client()

-    tenant_id, timeline_id = env.neon_cli.create_tenant(
+    tenant_id, timeline_id = env.create_tenant(
        conf={
            # Disable background GC and compaction, we'll run compaction manually.
            "gc_period": "0s",
@@ -68,7 +68,7 @@ def test_compaction_l0_memory(neon_compare: NeonCompare):
    env = neon_compare.env
    pageserver_http = env.pageserver.http_client()

-    tenant_id, timeline_id = env.neon_cli.create_tenant(
+    tenant_id, timeline_id = env.create_tenant(
        conf={
            # Initially disable compaction so that we will build up a stack of L0s
            "compaction_period": "0s",
--- a/test_runner/performance/test_gc_feedback.py
+++ b/test_runner/performance/test_gc_feedback.py
@@ -11,7 +11,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
    env = neon_env_builder.init_start()
    client = env.pageserver.http_client()

-    tenant_id, _ = env.neon_cli.create_tenant(
+    tenant_id, _ = env.create_tenant(
        conf={
            # disable default GC and compaction
            "gc_period": "1000 m",
@@ -63,7 +63,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
            log.info(f"Physical storage size {physical_size}")
        if mode == "with_snapshots":
            if step == n_steps / 2:
-                env.neon_cli.create_branch("child")
+                env.create_branch("child")

    max_num_of_deltas_above_image = 0
    max_total_num_of_deltas = 0
--- a/test_runner/performance/test_layer_map.py
+++ b/test_runner/performance/test_layer_map.py
@@ -15,7 +15,7 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
    # We want to have a lot of lot of layer files to exercise the layer map. Disable
    # GC, and make checkpoint_distance very small, so that we get a lot of small layer
    # files.
-    tenant, timeline = env.neon_cli.create_tenant(
+    tenant, timeline = env.create_tenant(
        conf={
            "gc_period": "0s",
            "checkpoint_distance": "16384",
--- a/test_runner/performance/test_lazy_startup.py
+++ b/test_runner/performance/test_lazy_startup.py
@@ -33,7 +33,7 @@ def test_lazy_startup(slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark:
    env = neon_env_builder.init_start()

    lazy_slru_download = "true" if slru == "lazy" else "false"
-    tenant, _ = env.neon_cli.create_tenant(
+    tenant, _ = env.create_tenant(
        conf={
            "lazy_slru_download": lazy_slru_download,
        }
--- a/test_runner/performance/test_sharding_autosplit.py
+++ b/test_runner/performance/test_sharding_autosplit.py
@@ -85,7 +85,7 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    tenants = {}
    for tenant_id in set(TenantId.generate() for _i in range(0, tenant_count)):
        timeline_id = TimelineId.generate()
-        env.neon_cli.create_tenant(tenant_id, timeline_id, conf=tenant_conf)
+        env.create_tenant(tenant_id, timeline_id, conf=tenant_conf)
        endpoint = env.endpoints.create("main", tenant_id=tenant_id)
        tenants[tenant_id] = TenantState(timeline_id, endpoint)
        endpoint.start()
--- a/test_runner/performance/test_startup.py
+++ b/test_runner/performance/test_startup.py
@@ -27,7 +27,7 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

-    env.neon_cli.create_branch("test_startup")
+    env.create_branch("test_startup")

    endpoint = None

--- a/test_runner/regress/test_ancestor_branch.py
+++ b/test_runner/regress/test_ancestor_branch.py
@@ -12,7 +12,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
    pageserver_http = env.pageserver.http_client()

    # Override defaults: 4M checkpoint_distance, disable background compaction and gc.
-    tenant, _ = env.neon_cli.create_tenant(
+    tenant, _ = env.create_tenant(
        conf={
            "checkpoint_distance": "4194304",
            "gc_period": "0s",
@@ -45,7 +45,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
    log.info(f"LSN after 100k rows: {lsn_100}")

    # Create branch1.
-    env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100)
+    env.create_branch(
+        "branch1", ancestor_branch_name="main", ancestor_start_lsn=lsn_100, tenant_id=tenant
+    )
    endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant)

    branch1_cur = endpoint_branch1.connect().cursor()
@@ -67,7 +69,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
    log.info(f"LSN after 200k rows: {lsn_200}")

    # Create branch2.
-    env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200)
+    env.create_branch(
+        "branch2", ancestor_branch_name="branch1", ancestor_start_lsn=lsn_200, tenant_id=tenant
+    )
    endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant)
    branch2_cur = endpoint_branch2.connect().cursor()

--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -41,7 +41,7 @@ def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, N
    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)

    ps_http = env.pageserver.http_client()
-    (tenant_id, _) = env.neon_cli.create_tenant()
+    (tenant_id, _) = env.create_tenant()
    assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
    config_pre_detach = ps_http.tenant_config(tenant_id)
    assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
@@ -109,7 +109,7 @@ def test_empty_config(positive_env: NeonEnv, content_type: Optional[str]):
    """
    env = positive_env
    ps_http = env.pageserver.http_client()
-    (tenant_id, _) = env.neon_cli.create_tenant()
+    (tenant_id, _) = env.create_tenant()
    assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
    config_pre_detach = ps_http.tenant_config(tenant_id)
    assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
@@ -182,7 +182,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
        fully_custom_config.keys()
    ), "ensure we cover all config options"

-    (tenant_id, _) = env.neon_cli.create_tenant()
+    (tenant_id, _) = env.create_tenant()
    ps_http.set_tenant_config(tenant_id, fully_custom_config)
    our_tenant_config = ps_http.tenant_config(tenant_id)
    assert our_tenant_config.tenant_specific_overrides == fully_custom_config
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -76,7 +76,7 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()

    branch = "test_compute_auth_to_pageserver"
-    env.neon_cli.create_branch(branch)
+    env.create_branch(branch)
    endpoint = env.endpoints.create_start(branch)

    with closing(endpoint.connect()) as conn:
@@ -186,7 +186,7 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    env = neon_env_builder.init_start()

    branch = f"test_auth_failures_auth_enabled_{auth_enabled}"
-    timeline_id = env.neon_cli.create_branch(branch)
+    timeline_id = env.create_branch(branch)
    env.endpoints.create_start(branch)

    tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)
--- a/test_runner/regress/test_backpressure.py
+++ b/test_runner/regress/test_backpressure.py
@@ -98,7 +98,7 @@ def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_
 def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
    # Create a branch for us
-    env.neon_cli.create_branch("test_backpressure")
+    env.create_branch("test_backpressure")

    endpoint = env.endpoints.create(
        "test_backpressure", config_lines=["max_replication_write_lag=30MB"]
--- a/test_runner/regress/test_bad_connection.py
+++ b/test_runner/regress/test_bad_connection.py
@@ -22,7 +22,7 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
    pageserver_http = env.pageserver.http_client()
    pageserver_http.configure_failpoints(("simulated-bad-compute-connection", "50%return(15)"))

-    env.neon_cli.create_branch("test_compute_pageserver_connection_stress")
+    env.create_branch("test_compute_pageserver_connection_stress")
    endpoint = env.endpoints.create_start("test_compute_pageserver_connection_stress")

    pg_conn = endpoint.connect()
--- a/test_runner/regress/test_branch_and_gc.py
+++ b/test_runner/regress/test_branch_and_gc.py
@@ -53,7 +53,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
    env = neon_simple_env
    pageserver_http_client = env.pageserver.http_client()

-    tenant, timeline_main = env.neon_cli.create_tenant(
+    tenant, timeline_main = env.create_tenant(
        conf={
            # disable background GC
            "gc_period": "0s",
@@ -90,7 +90,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
    pageserver_http_client.timeline_checkpoint(tenant, timeline_main)
    pageserver_http_client.timeline_gc(tenant, timeline_main, lsn2 - lsn1 + 1024)

-    env.neon_cli.create_branch(
+    env.create_branch(
        "test_branch", ancestor_branch_name="main", ancestor_start_lsn=lsn1, tenant_id=tenant
    )
    endpoint_branch = env.endpoints.create_start("test_branch", tenant_id=tenant)
@@ -127,7 +127,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
    env.storage_controller.allowed_errors.extend(error_regexes)

    # Disable background GC but set the `pitr_interval` to be small, so GC can delete something
-    tenant, _ = env.neon_cli.create_tenant(
+    tenant, _ = env.create_tenant(
        conf={
            # disable background GC
            "gc_period": "0s",
@@ -145,7 +145,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
        }
    )

-    b0 = env.neon_cli.create_branch("b0", tenant_id=tenant)
+    b0 = env.create_branch("b0", tenant_id=tenant)
    endpoint0 = env.endpoints.create_start("b0", tenant_id=tenant)
    res = endpoint0.safe_psql_many(
        queries=[
@@ -176,7 +176,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):

    # The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
    with pytest.raises(Exception, match="invalid branch start lsn: .*"):
-        env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn)
+        env.create_branch("b1", ancestor_branch_name="b0", ancestor_start_lsn=lsn, tenant_id=tenant)
    # retry the same with the HTTP API, so that we can inspect the status code
    with pytest.raises(TimelineCreate406):
        new_timeline_id = TimelineId.generate()
--- a/test_runner/regress/test_branch_behind.py
+++ b/test_runner/regress/test_branch_behind.py
@@ -23,7 +23,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
    env.storage_controller.allowed_errors.extend(error_regexes)

    # Branch at the point where only 100 rows were inserted
-    branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
+    branch_behind_timeline_id = env.create_branch("test_branch_behind")
    endpoint_main = env.endpoints.create_start("test_branch_behind")

    main_cur = endpoint_main.connect().cursor()
@@ -58,8 +58,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
    log.info(f"LSN after 200100 rows: {lsn_b}")

    # Branch at the point where only 100 rows were inserted
-    env.neon_cli.create_branch(
-        "test_branch_behind_hundred", "test_branch_behind", ancestor_start_lsn=lsn_a
+    env.create_branch(
+        "test_branch_behind_hundred",
+        ancestor_branch_name="test_branch_behind",
+        ancestor_start_lsn=lsn_a,
    )

    # Insert many more rows. This generates enough WAL to fill a few segments.
@@ -75,8 +77,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
    log.info(f"LSN after 400100 rows: {lsn_c}")

    # Branch at the point where only 200100 rows were inserted
-    env.neon_cli.create_branch(
-        "test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b
+    env.create_branch(
+        "test_branch_behind_more",
+        ancestor_branch_name="test_branch_behind",
+        ancestor_start_lsn=lsn_b,
    )

    endpoint_hundred = env.endpoints.create_start("test_branch_behind_hundred")
@@ -97,15 +101,17 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
    pageserver_http = env.pageserver.http_client()

    # branch at segment boundary
-    env.neon_cli.create_branch(
-        "test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn=Lsn("0/3000000")
+    env.create_branch(
+        "test_branch_segment_boundary",
+        ancestor_branch_name="test_branch_behind",
+        ancestor_start_lsn=Lsn("0/3000000"),
    )
    endpoint = env.endpoints.create_start("test_branch_segment_boundary")
    assert endpoint.safe_psql("SELECT 1")[0][0] == 1

    # branch at pre-initdb lsn (from main branch)
    with pytest.raises(Exception, match="invalid branch start lsn: .*"):
-        env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
+        env.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
    # retry the same with the HTTP API, so that we can inspect the status code
    with pytest.raises(TimelineCreate406):
        new_timeline_id = TimelineId.generate()
@@ -116,8 +122,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):

    # branch at pre-ancestor lsn
    with pytest.raises(Exception, match="less than timeline ancestor lsn"):
-        env.neon_cli.create_branch(
-            "test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn=Lsn("0/42")
+        env.create_branch(
+            "test_branch_preinitdb",
+            ancestor_branch_name="test_branch_behind",
+            ancestor_start_lsn=Lsn("0/42"),
        )
    # retry the same with the HTTP API, so that we can inspect the status code
    with pytest.raises(TimelineCreate406):
@@ -139,8 +147,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
    print_gc_result(gc_result)
    with pytest.raises(Exception, match="invalid branch start lsn: .*"):
        # this gced_lsn is pretty random, so if gc is disabled this woudln't fail
-        env.neon_cli.create_branch(
-            "test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn
+        env.create_branch(
+            "test_branch_create_fail",
+            ancestor_branch_name="test_branch_behind",
+            ancestor_start_lsn=gced_lsn,
        )
    # retry the same with the HTTP API, so that we can inspect the status code
    with pytest.raises(TimelineCreate406):
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -38,7 +38,7 @@ def test_branching_with_pgbench(
    env = neon_simple_env

    # Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
-    tenant, _ = env.neon_cli.create_tenant(
+    tenant, _ = env.create_tenant(
        conf={
            "gc_period": "5 s",
            "gc_horizon": f"{1024 ** 2}",
@@ -55,7 +55,7 @@ def test_branching_with_pgbench(
        pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
        pg_bin.run_capture(["pgbench", "-T15", connstr])

-    env.neon_cli.create_branch("b0", tenant_id=tenant)
+    env.create_branch("b0", tenant_id=tenant)
    endpoints: List[Endpoint] = []
    endpoints.append(env.endpoints.create_start("b0", tenant_id=tenant))

@@ -84,9 +84,9 @@ def test_branching_with_pgbench(
            threads = []

        if ty == "cascade":
-            env.neon_cli.create_branch(f"b{i + 1}", f"b{i}", tenant_id=tenant)
+            env.create_branch(f"b{i + 1}", ancestor_branch_name=f"b{i}", tenant_id=tenant)
        else:
-            env.neon_cli.create_branch(f"b{i + 1}", "b0", tenant_id=tenant)
+            env.create_branch(f"b{i + 1}", ancestor_branch_name="b0", tenant_id=tenant)

        endpoints.append(env.endpoints.create_start(f"b{i + 1}", tenant_id=tenant))

@@ -120,7 +120,7 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi

    env = neon_simple_env

-    env.neon_cli.create_branch("b0")
+    env.create_branch("b0")
    endpoint0 = env.endpoints.create_start("b0")

    pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()])
@@ -133,7 +133,7 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
    start_lsn = Lsn((int(curr_lsn) - XLOG_BLCKSZ) // XLOG_BLCKSZ * XLOG_BLCKSZ)

    log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
-    env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
+    env.create_branch("b1", ancestor_branch_name="b0", ancestor_start_lsn=start_lsn)
    endpoint1 = env.endpoints.create_start("b1")

    pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()])
@@ -173,7 +173,7 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE

        wait_until_paused(env, "before-upload-index-pausable")

-        env.neon_cli.map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
+        env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline)

        with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"):
            env.endpoints.create_start(
@@ -432,9 +432,7 @@ def test_branching_while_stuck_find_gc_cutoffs(neon_env_builder: NeonEnvBuilder)

        wait_until_paused(env, failpoint)

-        env.neon_cli.create_branch(
-            tenant_id=env.initial_tenant, ancestor_branch_name="main", new_branch_name="branch"
-        )
+        env.create_branch("branch", ancestor_branch_name="main")

        client.configure_failpoints((failpoint, "off"))

--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -34,7 +34,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
    tenant_timelines: List[Tuple[TenantId, TimelineId, Endpoint]] = []

    for _ in range(3):
-        tenant_id, timeline_id = env.neon_cli.create_tenant()
+        tenant_id, timeline_id = env.create_tenant()

        endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
        with endpoint.cursor() as cur:
@@ -84,13 +84,11 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
 def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
    env = neon_simple_env

-    tenant_id, _ = env.neon_cli.create_tenant()
+    tenant_id, _ = env.create_tenant()

    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        futures = [
-            executor.submit(
-                env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id
-            )
+            executor.submit(env.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id)
            for i in range(4)
        ]
        for future in futures:
@@ -111,7 +109,7 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
    tenant_id = env.initial_tenant

    timelines_dir = env.pageserver.timeline_dir(tenant_id)
-    old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
+    old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
    initial_timeline_dirs = [d for d in timelines_dir.iterdir()]

    # Introduce failpoint during timeline init (some intermediate files are on disk), before it's checkpointed.
@@ -123,7 +121,7 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
    env.pageserver.restart(immediate=True)

    # Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
-    new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
+    new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
    assert (
        new_tenant_timelines == old_tenant_timelines
    ), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
@@ -151,11 +149,11 @@ def test_timeline_init_break_before_checkpoint_recreate(
        ]
    )

-    env.neon_cli.create_tenant(env.initial_tenant)
+    env.create_tenant(env.initial_tenant)
    tenant_id = env.initial_tenant

    timelines_dir = env.pageserver.timeline_dir(tenant_id)
-    old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
+    old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
    initial_timeline_dirs = [d for d in timelines_dir.iterdir()]

    # Some fixed timeline ID (like control plane does)
@@ -176,7 +174,7 @@ def test_timeline_init_break_before_checkpoint_recreate(
    env.pageserver.restart(immediate=True)

    # Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
-    new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
+    new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
    assert (
        new_tenant_timelines == old_tenant_timelines
    ), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
@@ -201,7 +199,7 @@ def test_timeline_create_break_after_dir_creation(neon_env_builder: NeonEnvBuild
    tenant_id = env.initial_tenant

    timelines_dir = env.pageserver.timeline_dir(tenant_id)
-    old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
+    old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
    initial_timeline_dirs = [d for d in timelines_dir.iterdir()]

    # Introduce failpoint when creating a new timeline, right after creating its directory
@@ -211,7 +209,7 @@ def test_timeline_create_break_after_dir_creation(neon_env_builder: NeonEnvBuild

    # Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
    # "New" timeline is not present in the list, allowing pageserver to retry the same request
-    new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
+    new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
    assert (
        new_tenant_timelines == old_tenant_timelines
    ), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
--- a/test_runner/regress/test_change_pageserver.py
+++ b/test_runner/regress/test_change_pageserver.py
@@ -34,7 +34,7 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder, make_httpserver):
        ignore_notify
    )

-    env.neon_cli.create_branch("test_change_pageserver")
+    env.create_branch("test_change_pageserver")
    endpoint = env.endpoints.create_start("test_change_pageserver")

    # Put this tenant into a dual-attached state
--- a/test_runner/regress/test_clog_truncate.py
+++ b/test_runner/regress/test_clog_truncate.py
@@ -56,8 +56,10 @@ def test_clog_truncate(neon_simple_env: NeonEnv):

    # create new branch after clog truncation and start a compute node on it
    log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
-    env.neon_cli.create_branch(
-        "test_clog_truncate_new", "main", ancestor_start_lsn=lsn_after_truncation
+    env.create_branch(
+        "test_clog_truncate_new",
+        ancestor_branch_name="main",
+        ancestor_start_lsn=lsn_after_truncation,
    )
    endpoint2 = env.endpoints.create_start("test_clog_truncate_new")

--- a/test_runner/regress/test_close_fds.py
+++ b/test_runner/regress/test_close_fds.py
@@ -23,7 +23,7 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
    env = neon_simple_env

    def start_workload():
-        env.neon_cli.create_branch("test_lsof_pageserver_pid")
+        env.create_branch("test_lsof_pageserver_pid")
        endpoint = env.endpoints.create_start("test_lsof_pageserver_pid")
        with closing(endpoint.connect()) as conn:
            with conn.cursor() as cur:
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -517,7 +517,7 @@ def test_historic_storage_formats(
    assert metadata_summary["tenant_count"] >= 1
    assert metadata_summary["timeline_count"] >= 1

-    env.neon_cli.import_tenant(dataset.tenant_id)
+    env.neon_cli.tenant_import(dataset.tenant_id)

    # Discover timelines
    timelines = env.pageserver.http_client().timeline_list(dataset.tenant_id)
--- a/test_runner/regress/test_config.py
+++ b/test_runner/regress/test_config.py
@@ -38,7 +38,7 @@ def test_safekeepers_reconfigure_reorder(
 ):
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_safekeepers_reconfigure_reorder")
+    env.create_branch("test_safekeepers_reconfigure_reorder")

    endpoint = env.endpoints.create_start("test_safekeepers_reconfigure_reorder")

--- a/test_runner/regress/test_crafted_wal_end.py
+++ b/test_runner/regress/test_crafted_wal_end.py
@@ -1,6 +1,7 @@
 import pytest
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
+from fixtures.neon_cli import WalCraft
+from fixtures.neon_fixtures import NeonEnvBuilder

 # Restart nodes with WAL end having specially crafted shape, like last record
 # crossing segment boundary, to test decoding issues.
@@ -18,7 +19,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
 )
 def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_crafted_wal_end")
+    env.create_branch("test_crafted_wal_end")
    env.pageserver.allowed_errors.extend(
        [
            # seems like pageserver stop triggers these
@@ -27,7 +28,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
    )

    endpoint = env.endpoints.create("test_crafted_wal_end")
-    wal_craft = WalCraft(env)
+    wal_craft = WalCraft(extra_env=None, binpath=env.neon_binpath)
    endpoint.config(wal_craft.postgres_config())
    endpoint.start()
    res = endpoint.safe_psql_many(
--- a/test_runner/regress/test_createdropdb.py
+++ b/test_runner/regress/test_createdropdb.py
@@ -31,7 +31,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
        lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")

    # Create a branch
-    env.neon_cli.create_branch("test_createdb2", "main", ancestor_start_lsn=lsn)
+    env.create_branch("test_createdb2", ancestor_branch_name="main", ancestor_start_lsn=lsn)
    endpoint2 = env.endpoints.create_start("test_createdb2")

    # Test that you can connect to the new database on both branches
@@ -77,10 +77,14 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
        lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")

    # Create two branches before and after database drop.
-    env.neon_cli.create_branch("test_before_dropdb", "main", ancestor_start_lsn=lsn_before_drop)
+    env.create_branch(
+        "test_before_dropdb", ancestor_branch_name="main", ancestor_start_lsn=lsn_before_drop
+    )
    endpoint_before = env.endpoints.create_start("test_before_dropdb")

-    env.neon_cli.create_branch("test_after_dropdb", "main", ancestor_start_lsn=lsn_after_drop)
+    env.create_branch(
+        "test_after_dropdb", ancestor_branch_name="main", ancestor_start_lsn=lsn_after_drop
+    )
    endpoint_after = env.endpoints.create_start("test_after_dropdb")

    # Test that database exists on the branch before drop
--- a/test_runner/regress/test_createuser.py
+++ b/test_runner/regress/test_createuser.py
@@ -18,7 +18,7 @@ def test_createuser(neon_simple_env: NeonEnv):
        lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")

    # Create a branch
-    env.neon_cli.create_branch("test_createuser2", "main", ancestor_start_lsn=lsn)
+    env.create_branch("test_createuser2", ancestor_branch_name="main", ancestor_start_lsn=lsn)
    endpoint2 = env.endpoints.create_start("test_createuser2")

    # Test that you can connect to new branch as a new user
--- a/test_runner/regress/test_disk_usage_eviction.py
+++ b/test_runner/regress/test_disk_usage_eviction.py
@@ -59,11 +59,11 @@ def test_min_resident_size_override_handling(
    env.pageserver.stop()
    env.pageserver.start()

-    tenant_id, _ = env.neon_cli.create_tenant()
+    tenant_id, _ = env.create_tenant()
    assert_overrides(tenant_id, config_level_override)

    # Also ensure that specifying the paramter to create_tenant works, in addition to http-level recconfig.
-    tenant_id, _ = env.neon_cli.create_tenant(conf={"min_resident_size_override": "100"})
+    tenant_id, _ = env.create_tenant(conf={"min_resident_size_override": "100"})
    assert_config(tenant_id, 100, 100)
    ps_http.set_tenant_config(tenant_id, {})
    assert_config(tenant_id, None, config_level_override)
@@ -280,7 +280,7 @@ def _eviction_env(
 def pgbench_init_tenant(
    layer_size: int, scale: int, env: NeonEnv, pg_bin: PgBin
 ) -> Tuple[TenantId, TimelineId]:
-    tenant_id, timeline_id = env.neon_cli.create_tenant(
+    tenant_id, timeline_id = env.create_tenant(
        conf={
            "gc_period": "0s",
            "compaction_period": "0s",
--- a/test_runner/regress/test_download_extensions.py
+++ b/test_runner/regress/test_download_extensions.py
@@ -81,7 +81,7 @@ def test_remote_extensions(
    # Start a compute node with remote_extension spec
    # and check that it can download the extensions and use them to CREATE EXTENSION.
    env = neon_env_builder_local.init_start()
-    env.neon_cli.create_branch("test_remote_extensions")
+    env.create_branch("test_remote_extensions")
    endpoint = env.endpoints.create(
        "test_remote_extensions",
        config_lines=["log_min_messages=debug3"],
--- a/test_runner/regress/test_endpoint_crash.py
+++ b/test_runner/regress/test_endpoint_crash.py
@@ -15,7 +15,7 @@ def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
    Test that triggering crash from neon_test_utils crashes the endpoint
    """
    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_endpoint_crash")
+    env.create_branch("test_endpoint_crash")
    endpoint = env.endpoints.create_start("test_endpoint_crash")

    endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
--- a/test_runner/regress/test_fsm_truncate.py
+++ b/test_runner/regress/test_fsm_truncate.py
@@ -3,7 +3,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder

 def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_fsm_truncate")
+    env.create_branch("test_fsm_truncate")
    endpoint = env.endpoints.create_start("test_fsm_truncate")
    endpoint.safe_psql(
        "CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;"
--- a/test_runner/regress/test_gc_aggressive.py
+++ b/test_runner/regress/test_gc_aggressive.py
@@ -68,7 +68,7 @@ async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId):
 def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
    # Disable pitr, because here we want to test branch creation after GC
    env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
-    timeline = env.neon_cli.create_branch("test_gc_aggressive", "main")
+    timeline = env.create_branch("test_gc_aggressive", ancestor_branch_name="main")
    endpoint = env.endpoints.create_start("test_gc_aggressive")

    with endpoint.cursor() as cur:
@@ -99,7 +99,7 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder):
    # Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls
    env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
    tenant_id = env.initial_tenant
-    timeline_id = env.neon_cli.create_branch("test_gc_index_upload", "main")
+    timeline_id = env.create_branch("test_gc_index_upload", ancestor_branch_name="main")
    endpoint = env.endpoints.create_start("test_gc_index_upload")

    pageserver_http = env.pageserver.http_client()
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -98,27 +98,15 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
    )

    def import_tar(base, wal):
-        env.neon_cli.raw_cli(
-            [
-                "timeline",
-                "import",
-                "--tenant-id",
-                str(tenant),
-                "--timeline-id",
-                str(timeline),
-                "--branch-name",
-                branch_name,
-                "--base-lsn",
-                start_lsn,
-                "--base-tarfile",
-                base,
-                "--end-lsn",
-                end_lsn,
-                "--wal-tarfile",
-                wal,
-                "--pg-version",
-                env.pg_version,
-            ]
+        env.neon_cli.timeline_import(
+            tenant_id=tenant,
+            timeline_id=timeline,
+            new_branch_name=branch_name,
+            base_tarfile=base,
+            base_lsn=start_lsn,
+            wal_tarfile=wal,
+            end_lsn=end_lsn,
+            pg_version=env.pg_version,
        )

    # Importing empty file fails
@@ -158,7 +146,7 @@ def test_import_from_pageserver_small(
    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
    env = neon_env_builder.init_start()

-    timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
+    timeline = env.create_branch("test_import_from_pageserver_small")
    endpoint = env.endpoints.create_start("test_import_from_pageserver_small")

    num_rows = 3000
@@ -177,7 +165,7 @@ def test_import_from_pageserver_multisegment(
    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
    env = neon_env_builder.init_start()

-    timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment")
+    timeline = env.create_branch("test_import_from_pageserver_multisegment")
    endpoint = env.endpoints.create_start("test_import_from_pageserver_multisegment")

    # For `test_import_from_pageserver_multisegment`, we want to make sure that the data
@@ -268,23 +256,13 @@ def _import(
    branch_name = "import_from_pageserver"
    client = env.pageserver.http_client()
    env.pageserver.tenant_create(tenant)
-    env.neon_cli.raw_cli(
-        [
-            "timeline",
-            "import",
-            "--tenant-id",
-            str(tenant),
-            "--timeline-id",
-            str(timeline),
-            "--branch-name",
-            branch_name,
-            "--base-lsn",
-            str(lsn),
-            "--base-tarfile",
-            str(tar_output_file),
-            "--pg-version",
-            env.pg_version,
-        ]
+    env.neon_cli.timeline_import(
+        tenant_id=tenant,
+        timeline_id=timeline,
+        new_branch_name=branch_name,
+        base_lsn=lsn,
+        base_tarfile=tar_output_file,
+        pg_version=env.pg_version,
    )

    # Wait for data to land in s3
--- a/test_runner/regress/test_layer_eviction.py
+++ b/test_runner/regress/test_layer_eviction.py
@@ -178,9 +178,9 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):

    def tenant_update_config(changes):
        tenant_config.update(changes)
-        env.neon_cli.config_tenant(tenant_id, tenant_config)
+        env.config_tenant(tenant_id, tenant_config)

-    tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
+    tenant_id, timeline_id = env.create_tenant(conf=tenant_config)
    log.info("tenant id is %s", tenant_id)
    env.initial_tenant = tenant_id  # update_and_gc relies on this
    ps_http = env.pageserver.http_client()
--- a/test_runner/regress/test_layer_writers_fail.py
+++ b/test_runner/regress/test_layer_writers_fail.py
@@ -8,7 +8,7 @@ def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
    env = neon_simple_env
    pageserver_http = env.pageserver.http_client()

-    tenant_id, timeline_id = env.neon_cli.create_tenant(
+    tenant_id, timeline_id = env.create_tenant(
        conf={
            # small checkpoint distance to create more delta layer files
            "checkpoint_distance": f"{1024 ** 2}",
@@ -52,7 +52,7 @@ def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
    env = neon_simple_env
    pageserver_http = env.pageserver.http_client()

-    tenant_id, timeline_id = env.neon_cli.create_tenant(
+    tenant_id, timeline_id = env.create_tenant(
        conf={
            # small checkpoint distance to create more delta layer files
            "checkpoint_distance": f"{1024 ** 2}",
--- a/test_runner/regress/test_layers_from_future.py
+++ b/test_runner/regress/test_layers_from_future.py
@@ -56,7 +56,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
        "compaction_target_size": f"{128 * (1024**3)}",  # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers
    }

-    tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
+    tenant_id, timeline_id = env.create_tenant(conf=tenant_config)

    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)

--- a/test_runner/regress/test_logical_replication.py
+++ b/test_runner/regress/test_logical_replication.py
@@ -219,7 +219,7 @@ def test_ondemand_wal_download_in_replication_slot_funcs(neon_env_builder: NeonE
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

-    env.neon_cli.create_branch("init")
+    env.create_branch("init")
    endpoint = env.endpoints.create_start("init")

    with endpoint.connect().cursor() as cur:
@@ -270,7 +270,7 @@ def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg):
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

-    env.neon_cli.create_branch("init")
+    env.create_branch("init")
    endpoint = env.endpoints.create_start("init")

    with endpoint.connect().cursor() as cur:
@@ -352,7 +352,7 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
 def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
    env = neon_simple_env

-    env.neon_cli.create_branch("init")
+    env.create_branch("init")
    endpoint = env.endpoints.create_start("init")
    tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
    timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
@@ -397,7 +397,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
 def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
    env = neon_simple_env

-    env.neon_cli.create_branch("init")
+    env.create_branch("init")
    endpoint = env.endpoints.create_start("init")

    cur = endpoint.connect().cursor()
@@ -445,7 +445,7 @@ def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
 def test_slots_and_branching(neon_simple_env: NeonEnv):
    env = neon_simple_env

-    tenant, timeline = env.neon_cli.create_tenant()
+    tenant, timeline = env.create_tenant()
    env.pageserver.http_client()

    main_branch = env.endpoints.create_start("main", tenant_id=tenant)
@@ -457,7 +457,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
    wait_for_last_flush_lsn(env, main_branch, tenant, timeline)

    # Create branch ws.
-    env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
+    env.create_branch("ws", ancestor_branch_name="main", tenant_id=tenant)
    ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)

    # Check that we can create slot with the same name
@@ -469,10 +469,10 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
 def test_replication_shutdown(neon_simple_env: NeonEnv):
    # Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
    env = neon_simple_env
-    env.neon_cli.create_branch("test_replication_shutdown_publisher", "main")
+    env.create_branch("test_replication_shutdown_publisher", ancestor_branch_name="main")
    pub = env.endpoints.create("test_replication_shutdown_publisher")

-    env.neon_cli.create_branch("test_replication_shutdown_subscriber")
+    env.create_branch("test_replication_shutdown_subscriber")
    sub = env.endpoints.create("test_replication_shutdown_subscriber")

    pub.respec(skip_pg_catalog_updates=False)
@@ -575,7 +575,7 @@ def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg):
    vanilla_pg.start()
    vanilla_pg.safe_psql("create extension neon;")

-    env.neon_cli.create_branch("subscriber")
+    env.create_branch("subscriber")
    sub = env.endpoints.create("subscriber")
    sub.start()

--- a/test_runner/regress/test_lsn_mapping.py
+++ b/test_runner/regress/test_lsn_mapping.py
@@ -32,7 +32,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
    """
    env = neon_env_builder.init_start()

-    tenant_id, _ = env.neon_cli.create_tenant(
+    tenant_id, _ = env.create_tenant(
        conf={
            # disable default GC and compaction
            "gc_period": "1000 m",
@@ -43,7 +43,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
        }
    )

-    timeline_id = env.neon_cli.create_branch("test_lsn_mapping", tenant_id=tenant_id)
+    timeline_id = env.create_branch("test_lsn_mapping", tenant_id=tenant_id)
    endpoint_main = env.endpoints.create_start("test_lsn_mapping", tenant_id=tenant_id)
    timeline_id = endpoint_main.safe_psql("show neon.timeline_id")[0][0]

@@ -123,8 +123,8 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
            endpoint_here.stop_and_destroy()

        # Do the "past" check again at a new branch to ensure that we don't return something before the branch cutoff
-        timeline_id_child = env.neon_cli.create_branch(
-            "test_lsn_mapping_child", tenant_id=tenant_id, ancestor_branch_name="test_lsn_mapping"
+        timeline_id_child = env.create_branch(
+            "test_lsn_mapping_child", ancestor_branch_name="test_lsn_mapping", tenant_id=tenant_id
        )

        # Timestamp is in the unreachable past
@@ -190,7 +190,7 @@ def test_ts_of_lsn_api(neon_env_builder: NeonEnvBuilder):

    env = neon_env_builder.init_start()

-    new_timeline_id = env.neon_cli.create_branch("test_ts_of_lsn_api")
+    new_timeline_id = env.create_branch("test_ts_of_lsn_api")
    endpoint_main = env.endpoints.create_start("test_ts_of_lsn_api")

    cur = endpoint_main.connect().cursor()
--- a/test_runner/regress/test_multixact.py
+++ b/test_runner/regress/test_multixact.py
@@ -72,9 +72,7 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
    assert int(next_multixact_id) > int(next_multixact_id_old)

    # Branch at this point
-    env.neon_cli.create_branch(
-        "test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn
-    )
+    env.create_branch("test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn)
    endpoint_new = env.endpoints.create_start("test_multixact_new")

    next_multixact_id_new = endpoint_new.safe_psql(
--- a/test_runner/regress/test_neon_cli.py
+++ b/test_runner/regress/test_neon_cli.py
@@ -31,7 +31,7 @@ def helper_compare_timeline_list(
        )
    )

-    timelines_cli = env.neon_cli.list_timelines(initial_tenant)
+    timelines_cli = env.neon_cli.timeline_list(initial_tenant)
    cli_timeline_ids = sorted([timeline_id for (_, timeline_id) in timelines_cli])
    assert timelines_api == cli_timeline_ids

@@ -44,17 +44,19 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv):
    helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)

    # Create a branch for us
-    main_timeline_id = env.neon_cli.create_branch("test_cli_branch_list_main")
+    main_timeline_id = env.create_branch("test_cli_branch_list_main")
    helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)

    # Create a nested branch
-    nested_timeline_id = env.neon_cli.create_branch(
-        "test_cli_branch_list_nested", "test_cli_branch_list_main"
+    nested_timeline_id = env.create_branch(
+        "test_cli_branch_list_nested", ancestor_branch_name="test_cli_branch_list_main"
    )
    helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)

    # Check that all new branches are visible via CLI
-    timelines_cli = [timeline_id for (_, timeline_id) in env.neon_cli.list_timelines()]
+    timelines_cli = [
+        timeline_id for (_, timeline_id) in env.neon_cli.timeline_list(env.initial_tenant)
+    ]

    assert main_timeline_id in timelines_cli
    assert nested_timeline_id in timelines_cli
@@ -64,7 +66,7 @@ def helper_compare_tenant_list(pageserver_http_client: PageserverHttpClient, env
    tenants = pageserver_http_client.tenant_list()
    tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants))

-    res = env.neon_cli.list_tenants()
+    res = env.neon_cli.tenant_list()
    tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))

    assert tenants_api == tenants_cli
@@ -77,18 +79,18 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):
    helper_compare_tenant_list(pageserver_http_client, env)

    # Create new tenant
-    tenant1, _ = env.neon_cli.create_tenant()
+    tenant1, _ = env.create_tenant()

    # check tenant1 appeared
    helper_compare_tenant_list(pageserver_http_client, env)

    # Create new tenant
-    tenant2, _ = env.neon_cli.create_tenant()
+    tenant2, _ = env.create_tenant()

    # check tenant2 appeared
    helper_compare_tenant_list(pageserver_http_client, env)

-    res = env.neon_cli.list_tenants()
+    res = env.neon_cli.tenant_list()
    tenants = sorted(map(lambda t: TenantId(t.split()[0]), res.stdout.splitlines()))

    assert env.initial_tenant in tenants
@@ -98,8 +100,8 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):

 def test_cli_tenant_create(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    tenant_id, _ = env.neon_cli.create_tenant()
-    timelines = env.neon_cli.list_timelines(tenant_id)
+    tenant_id, _ = env.create_tenant()
+    timelines = env.neon_cli.timeline_list(tenant_id)

    # an initial timeline should be created upon tenant creation
    assert len(timelines) == 1
@@ -132,7 +134,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
    env.neon_cli.pageserver_stop(env.pageserver.id)
    env.neon_cli.safekeeper_stop()
    env.neon_cli.storage_controller_stop(False)
-    env.neon_cli.broker_stop()
+    env.neon_cli.storage_broker_stop()

    # Keep NeonEnv state up to date, it usually owns starting/stopping services
    env.pageserver.running = False
@@ -175,7 +177,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):

    # Stop this to get out of the way of the following `start`
    env.neon_cli.storage_controller_stop(False)
-    env.neon_cli.broker_stop()
+    env.neon_cli.storage_broker_stop()

    # Default start
    res = env.neon_cli.raw_cli(["start"])
--- a/test_runner/regress/test_neon_extension.py
+++ b/test_runner/regress/test_neon_extension.py
@@ -8,7 +8,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
 # Verify that the neon extension is installed and has the correct version.
 def test_neon_extension(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_create_extension_neon")
+    env.create_branch("test_create_extension_neon")

    endpoint_main = env.endpoints.create("test_create_extension_neon")
    # don't skip pg_catalog updates - it runs CREATE EXTENSION neon
@@ -35,7 +35,7 @@ def test_neon_extension(neon_env_builder: NeonEnvBuilder):
 # Verify that the neon extension can be upgraded/downgraded.
 def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_neon_extension_compatibility")
+    env.create_branch("test_neon_extension_compatibility")

    endpoint_main = env.endpoints.create("test_neon_extension_compatibility")
    # don't skip pg_catalog updates - it runs CREATE EXTENSION neon
@@ -72,7 +72,7 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
 # Verify that the neon extension can be auto-upgraded to the latest version.
 def test_neon_extension_auto_upgrade(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_neon_extension_auto_upgrade")
+    env.create_branch("test_neon_extension_auto_upgrade")

    endpoint_main = env.endpoints.create("test_neon_extension_auto_upgrade")
    # don't skip pg_catalog updates - it runs CREATE EXTENSION neon
--- a/test_runner/regress/test_neon_local_cli.py
+++ b/test_runner/regress/test_neon_local_cli.py
@@ -1,4 +1,5 @@
 import pytest
+from fixtures.common_types import TimelineId
 from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.port_distributor import PortDistributor

@@ -10,22 +11,36 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
    # Skipping the init step that creates a local tenant in Pytest tests
    try:
        env.neon_cli.start()
-        env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True)
+        env.create_tenant(tenant_id=env.initial_tenant, set_default=True)

        main_branch_name = "main"
        pg_port = port_distributor.get_port()
        http_port = port_distributor.get_port()
        env.neon_cli.endpoint_create(
-            main_branch_name, pg_port, http_port, endpoint_id="ep-basic-main"
+            main_branch_name,
+            pg_port,
+            http_port,
+            endpoint_id="ep-basic-main",
+            tenant_id=env.initial_tenant,
+            pg_version=env.pg_version,
        )
        env.neon_cli.endpoint_start("ep-basic-main")

        branch_name = "migration-check"
-        env.neon_cli.create_branch(branch_name)
+        env.neon_cli.timeline_branch(
+            tenant_id=env.initial_tenant,
+            timeline_id=TimelineId.generate(),
+            new_branch_name=branch_name,
+        )
        pg_port = port_distributor.get_port()
        http_port = port_distributor.get_port()
        env.neon_cli.endpoint_create(
-            branch_name, pg_port, http_port, endpoint_id=f"ep-{branch_name}"
+            branch_name,
+            pg_port,
+            http_port,
+            endpoint_id=f"ep-{branch_name}",
+            tenant_id=env.initial_tenant,
+            pg_version=env.pg_version,
        )
        env.neon_cli.endpoint_start(f"ep-{branch_name}")
    finally:
@@ -43,12 +58,26 @@ def test_neon_two_primary_endpoints_fail(

    pg_port = port_distributor.get_port()
    http_port = port_distributor.get_port()
-    env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep1")
+    env.neon_cli.endpoint_create(
+        branch_name,
+        pg_port,
+        http_port,
+        endpoint_id="ep1",
+        tenant_id=env.initial_tenant,
+        pg_version=env.pg_version,
+    )

    pg_port = port_distributor.get_port()
    http_port = port_distributor.get_port()
    # ep1 is not running so create will succeed
-    env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep2")
+    env.neon_cli.endpoint_create(
+        branch_name,
+        pg_port,
+        http_port,
+        endpoint_id="ep2",
+        tenant_id=env.initial_tenant,
+        pg_version=env.pg_version,
+    )

    env.neon_cli.endpoint_start("ep1")

--- a/test_runner/regress/test_neon_superuser.py
+++ b/test_runner/regress/test_neon_superuser.py
@@ -6,10 +6,10 @@ from fixtures.utils import wait_until

 def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_neon_superuser_publisher", "main")
+    env.create_branch("test_neon_superuser_publisher", ancestor_branch_name="main")
    pub = env.endpoints.create("test_neon_superuser_publisher")

-    env.neon_cli.create_branch("test_neon_superuser_subscriber")
+    env.create_branch("test_neon_superuser_subscriber")
    sub = env.endpoints.create("test_neon_superuser_subscriber")

    pub.respec(skip_pg_catalog_updates=False)
--- a/test_runner/regress/test_normal_work.py
+++ b/test_runner/regress/test_normal_work.py
@@ -5,7 +5,7 @@ from fixtures.pageserver.http import PageserverHttpClient


 def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
-    tenant_id, timeline_id = env.neon_cli.create_tenant()
+    tenant_id, timeline_id = env.create_tenant()
    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
    # we rely upon autocommit after each statement
    res_1 = endpoint.safe_psql_many(
--- a/test_runner/regress/test_old_request_lsn.py
+++ b/test_runner/regress/test_old_request_lsn.py
@@ -17,7 +17,7 @@ from fixtures.utils import print_gc_result, query_scalar
 def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
    # Disable pitr, because here we want to test branch creation after GC
    env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
-    env.neon_cli.create_branch("test_old_request_lsn", "main")
+    env.create_branch("test_old_request_lsn", ancestor_branch_name="main")
    endpoint = env.endpoints.create_start("test_old_request_lsn")

    pg_conn = endpoint.connect()
--- a/test_runner/regress/test_ondemand_download.py
+++ b/test_runner/regress/test_ondemand_download.py
@@ -545,7 +545,7 @@ def test_compaction_downloads_on_demand_without_image_creation(neon_env_builder:
        layer_sizes += layer.layer_file_size
        pageserver_http.evict_layer(tenant_id, timeline_id, layer.layer_file_name)

-    env.neon_cli.config_tenant(tenant_id, {"compaction_threshold": "3"})
+    env.config_tenant(tenant_id, {"compaction_threshold": "3"})

    pageserver_http.timeline_compact(tenant_id, timeline_id)
    layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
@@ -647,7 +647,7 @@ def test_compaction_downloads_on_demand_with_image_creation(neon_env_builder: Ne
    # layers -- threshold of 2 would sound more reasonable, but keeping it as 1
    # to be less flaky
    conf["image_creation_threshold"] = "1"
-    env.neon_cli.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()})
+    env.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()})

    pageserver_http.timeline_compact(tenant_id, timeline_id)
    layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
--- a/test_runner/regress/test_pageserver_api.py
+++ b/test_runner/regress/test_pageserver_api.py
@@ -59,7 +59,7 @@ def check_client(env: NeonEnv, client: PageserverHttpClient):
 def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
    env = neon_simple_env
    with env.pageserver.http_client() as client:
-        tenant_id, timeline_id = env.neon_cli.create_tenant()
+        tenant_id, timeline_id = env.create_tenant()

        timeline_details = client.timeline_detail(
            tenant_id=tenant_id, timeline_id=timeline_id, include_non_incremental_logical_size=True
@@ -108,7 +108,7 @@ def expect_updated_msg_lsn(
 def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
    env = neon_simple_env
    with env.pageserver.http_client() as client:
-        tenant_id, timeline_id = env.neon_cli.create_tenant()
+        tenant_id, timeline_id = env.create_tenant()
        endpoint = env.endpoints.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)

        # insert something to force sk -> ps message
--- a/test_runner/regress/test_pageserver_catchup.py
+++ b/test_runner/regress/test_pageserver_catchup.py
@@ -9,7 +9,7 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

-    env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down")
+    env.create_branch("test_pageserver_catchup_while_compute_down")
    # Make shared_buffers large to ensure we won't query pageserver while it is down.
    endpoint = env.endpoints.create_start(
        "test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"]
--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -150,7 +150,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
    env.pageserver.start()
    env.storage_controller.node_configure(env.pageserver.id, {"availability": "Active"})

-    env.neon_cli.create_tenant(
+    env.create_tenant(
        tenant_id=env.initial_tenant, conf=TENANT_CONF, timeline_id=env.initial_timeline
    )

@@ -643,9 +643,7 @@ def test_upgrade_generationless_local_file_paths(

    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
-    env.neon_cli.create_tenant(
-        tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
-    )
+    env.create_tenant(tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}')

    workload = Workload(env, tenant_id, timeline_id)
    workload.init()
--- a/test_runner/regress/test_pageserver_layer_rolling.py
+++ b/test_runner/regress/test_pageserver_layer_rolling.py
@@ -42,7 +42,7 @@ async def run_worker_for_tenant(


 async def run_worker(env: NeonEnv, tenant_conf, entries: int) -> Tuple[TenantId, TimelineId, Lsn]:
-    tenant, timeline = env.neon_cli.create_tenant(conf=tenant_conf)
+    tenant, timeline = env.create_tenant(conf=tenant_conf)
    last_flush_lsn = await run_worker_for_tenant(env, entries, tenant)
    return tenant, timeline, last_flush_lsn

--- a/test_runner/regress/test_pageserver_reconnect.py
+++ b/test_runner/regress/test_pageserver_reconnect.py
@@ -14,7 +14,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
 # least the code gets exercised.
 def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_pageserver_restarts")
+    env.create_branch("test_pageserver_restarts")
    endpoint = env.endpoints.create_start("test_pageserver_restarts")
    n_reconnects = 1000
    timeout = 0.01
@@ -46,7 +46,7 @@ def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
 # Test handling errors during page server reconnect
 def test_pageserver_reconnect_failure(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_pageserver_reconnect")
+    env.create_branch("test_pageserver_reconnect")
    endpoint = env.endpoints.create_start("test_pageserver_reconnect")

    con = endpoint.connect()
--- a/test_runner/regress/test_pageserver_restart.py
+++ b/test_runner/regress/test_pageserver_restart.py
@@ -169,7 +169,7 @@ def test_pageserver_chaos(

    # Use a tiny checkpoint distance, to create a lot of layers quickly.
    # That allows us to stress the compaction and layer flushing logic more.
-    tenant, _ = env.neon_cli.create_tenant(
+    tenant, _ = env.create_tenant(
        conf={
            "checkpoint_distance": "5000000",
        }
--- a/test_runner/regress/test_pageserver_restarts_under_workload.py
+++ b/test_runner/regress/test_pageserver_restarts_under_workload.py
@@ -12,7 +12,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
 # running.
 def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_pageserver_restarts")
+    env.create_branch("test_pageserver_restarts")
    endpoint = env.endpoints.create_start("test_pageserver_restarts")
    n_restarts = 10
    scale = 10
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -650,7 +650,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
        tenant_id = TenantId.generate()
        timeline_a = TimelineId.generate()
        timeline_b = TimelineId.generate()
-        env.neon_cli.create_tenant(
+        env.create_tenant(
            tenant_id,
            timeline_a,
            placement_policy='{"Attached":1}',
@@ -658,7 +658,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
            # to trigger the upload promptly.
            conf={"heatmap_period": f"{upload_period_secs}s"},
        )
-        env.neon_cli.create_timeline("main2", tenant_id, timeline_b)
+        env.create_timeline("main2", tenant_id, timeline_b)

        tenant_timelines[tenant_id] = [timeline_a, timeline_b]

@@ -778,9 +778,7 @@ def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controll
    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()

-    env.neon_cli.create_tenant(
-        tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
-    )
+    env.create_tenant(tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}')

    attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
    ps_attached = env.get_pageserver(attached_to_id)
--- a/test_runner/regress/test_pitr_gc.py
+++ b/test_runner/regress/test_pitr_gc.py
@@ -57,7 +57,7 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):

    # Branch at the point where only 100 rows were inserted
    # It must have been preserved by PITR setting
-    env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a)
+    env.create_branch("test_pitr_gc_hundred", ancestor_branch_name="main", ancestor_start_lsn=lsn_a)

    endpoint_hundred = env.endpoints.create_start("test_pitr_gc_hundred")

--- a/test_runner/regress/test_recovery.py
+++ b/test_runner/regress/test_recovery.py
@@ -25,7 +25,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
    )

    # Create a branch for us
-    env.neon_cli.create_branch("test_pageserver_recovery", "main")
+    env.create_branch("test_pageserver_recovery", ancestor_branch_name="main")

    endpoint = env.endpoints.create_start("test_pageserver_recovery")

--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -230,7 +230,7 @@ def test_remote_storage_upload_queue_retries(

    # create tenant with config that will determinstically allow
    # compaction and gc
-    tenant_id, timeline_id = env.neon_cli.create_tenant(
+    tenant_id, timeline_id = env.create_tenant(
        conf={
            # small checkpointing and compaction targets to ensure we generate many upload operations
            "checkpoint_distance": f"{64 * 1024}",
@@ -640,7 +640,9 @@ def test_empty_branch_remote_storage_upload(neon_env_builder: NeonEnvBuilder):
    client = env.pageserver.http_client()

    new_branch_name = "new_branch"
-    new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
+    new_branch_timeline_id = env.create_branch(
+        new_branch_name, ancestor_branch_name="main", tenant_id=env.initial_tenant
+    )
    assert_nothing_to_upload(client, env.initial_tenant, new_branch_timeline_id)

    timelines_before_detach = set(
--- a/test_runner/regress/test_s3_restore.py
+++ b/test_runner/regress/test_s3_restore.py
@@ -60,9 +60,7 @@ def test_tenant_s3_restore(
    last_flush_lsns = []

    for timeline in ["first", "second"]:
-        timeline_id = env.neon_cli.create_branch(
-            timeline, tenant_id=tenant_id, ancestor_branch_name=parent
-        )
+        timeline_id = env.create_branch(timeline, ancestor_branch_name=parent, tenant_id=tenant_id)
        with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
            run_pg_bench_small(pg_bin, endpoint.connstr())
            endpoint.safe_psql(f"CREATE TABLE created_{timeline}(id integer);")
--- a/test_runner/regress/test_sharding.py
+++ b/test_runner/regress/test_sharding.py
@@ -77,7 +77,7 @@ def test_sharding_smoke(
    assert all(s < expect_initdb_size // 2 for s in sizes.values())

    # Test that timeline creation works on a sharded tenant
-    timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
+    timeline_b = env.create_branch("branch_b", tenant_id=tenant_id)

    # Test that we can write data to a sharded tenant
    workload = Workload(env, tenant_id, timeline_b, branch_name="branch_b")
@@ -378,7 +378,7 @@ def test_sharding_split_smoke(
    env.start()
    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
-    env.neon_cli.create_tenant(
+    env.create_tenant(
        tenant_id,
        timeline_id,
        shard_count=shard_count,
@@ -1127,7 +1127,7 @@ def test_sharding_split_failures(
    timeline_id = TimelineId.generate()

    # Create a tenant with secondary locations enabled
-    env.neon_cli.create_tenant(
+    env.create_tenant(
        tenant_id, timeline_id, shard_count=initial_shard_count, placement_policy='{"Attached":1}'
    )

@@ -1441,7 +1441,7 @@ def test_sharding_unlogged_relation(neon_env_builder: NeonEnvBuilder):

    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
-    env.neon_cli.create_tenant(tenant_id, timeline_id, shard_count=8)
+    env.create_tenant(tenant_id, timeline_id, shard_count=8)

    # We will create many tables to ensure it's overwhelmingly likely that at least one
    # of them doesn't land on shard 0
@@ -1483,7 +1483,7 @@ def test_top_tenants(neon_env_builder: NeonEnvBuilder):
    for i in range(0, n_tenants):
        tenant_id = TenantId.generate()
        timeline_id = TimelineId.generate()
-        env.neon_cli.create_tenant(tenant_id, timeline_id)
+        env.create_tenant(tenant_id, timeline_id)

        # Write a different amount of data to each tenant
        w = Workload(env, tenant_id, timeline_id)
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -96,7 +96,7 @@ def test_storage_controller_smoke(

    # Creating several tenants should spread out across the pageservers
    for tid in tenant_ids:
-        env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
+        env.create_tenant(tid, shard_count=shards_per_tenant)

    # Repeating a creation should be idempotent (we are just testing it doesn't return an error)
    env.storage_controller.tenant_create(
@@ -172,7 +172,7 @@ def test_storage_controller_smoke(
    # Create some fresh tenants
    tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
    for tid in tenant_ids:
-        env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
+        env.create_tenant(tid, shard_count=shards_per_tenant)

    counts = get_node_shard_counts(env, tenant_ids)
    # Nothing should have been scheduled on the node in Draining
@@ -806,10 +806,7 @@ def test_storage_controller_s3_time_travel_recovery(
    env.storage_controller.consistency_check()

    branch_name = "main"
-    timeline_id = env.neon_cli.create_timeline(
-        branch_name,
-        tenant_id=tenant_id,
-    )
+    timeline_id = env.create_timeline(branch_name, tenant_id=tenant_id)
    # Write some nontrivial amount of data into the endpoint and wait until it is uploaded
    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
        run_pg_bench_small(pg_bin, endpoint.connstr())
@@ -1009,9 +1006,7 @@ def test_storage_controller_tenant_deletion(

    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
-    env.neon_cli.create_tenant(
-        tenant_id, timeline_id, shard_count=2, placement_policy='{"Attached":1}'
-    )
+    env.create_tenant(tenant_id, timeline_id, shard_count=2, placement_policy='{"Attached":1}')

    # Ensure all the locations are configured, including secondaries
    env.storage_controller.reconcile_until_idle()
@@ -1217,10 +1212,7 @@ def test_storage_controller_heartbeats(
        env.storage_controller.tenant_create(tid)

        branch_name = "main"
-        env.neon_cli.create_timeline(
-            branch_name,
-            tenant_id=tid,
-        )
+        env.create_timeline(branch_name, tenant_id=tid)

        with env.endpoints.create_start("main", tenant_id=tid) as endpoint:
            run_pg_bench_small(pg_bin, endpoint.connstr())
@@ -1322,9 +1314,9 @@ def test_storage_controller_re_attach(neon_env_builder: NeonEnvBuilder):

    # We'll have two tenants.
    tenant_a = TenantId.generate()
-    env.neon_cli.create_tenant(tenant_a, placement_policy='{"Attached":1}')
+    env.create_tenant(tenant_a, placement_policy='{"Attached":1}')
    tenant_b = TenantId.generate()
-    env.neon_cli.create_tenant(tenant_b, placement_policy='{"Attached":1}')
+    env.create_tenant(tenant_b, placement_policy='{"Attached":1}')

    # Each pageserver will have one attached and one secondary location
    env.storage_controller.tenant_shard_migrate(
@@ -1647,7 +1639,7 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto

    # Create a second timeline to ensure that import finds both
    timeline_a = env.initial_timeline
-    timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
+    timeline_b = env.create_branch("branch_b", tenant_id=tenant_id)

    workload_a = Workload(env, tenant_id, timeline_a, branch_name="main")
    workload_a.init()
@@ -1689,7 +1681,7 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
    )

    # Now import it again
-    env.neon_cli.import_tenant(tenant_id)
+    env.neon_cli.tenant_import(tenant_id)

    # Check we found the shards
    describe = env.storage_controller.tenant_describe(tenant_id)
@@ -1731,7 +1723,7 @@ def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
    for _ in range(0, tenant_count):
        tid = TenantId.generate()
        tenant_ids.append(tid)
-        env.neon_cli.create_tenant(
+        env.create_tenant(
            tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
        )

@@ -1818,7 +1810,7 @@ def test_skip_drain_on_secondary_lag(neon_env_builder: NeonEnvBuilder, pg_bin: P
    env = neon_env_builder.init_configs()
    env.start()

-    tid, timeline_id = env.neon_cli.create_tenant(placement_policy='{"Attached":1}')
+    tid, timeline_id = env.create_tenant(placement_policy='{"Attached":1}')

    # Give things a chance to settle.
    env.storage_controller.reconcile_until_idle(timeout_secs=30)
@@ -1924,7 +1916,7 @@ def test_background_operation_cancellation(neon_env_builder: NeonEnvBuilder):
    for _ in range(0, tenant_count):
        tid = TenantId.generate()
        tenant_ids.append(tid)
-        env.neon_cli.create_tenant(
+        env.create_tenant(
            tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
        )

@@ -1984,7 +1976,7 @@ def test_storage_controller_node_deletion(
    for _ in range(0, tenant_count):
        tid = TenantId.generate()
        tenant_ids.append(tid)
-        env.neon_cli.create_tenant(
+        env.create_tenant(
            tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
        )

@@ -2109,7 +2101,7 @@ def test_storage_controller_metadata_health(
    )

    # Mock tenant with unhealthy scrubber scan result
-    tenant_b, _ = env.neon_cli.create_tenant(shard_count=shard_count)
+    tenant_b, _ = env.create_tenant(shard_count=shard_count)
    tenant_b_shard_ids = (
        env.storage_controller.tenant_shard_split(tenant_b, shard_count=shard_count)
        if shard_count is not None
@@ -2117,7 +2109,7 @@ def test_storage_controller_metadata_health(
    )

    # Mock tenant that never gets a health update from scrubber
-    tenant_c, _ = env.neon_cli.create_tenant(shard_count=shard_count)
+    tenant_c, _ = env.create_tenant(shard_count=shard_count)

    tenant_c_shard_ids = (
        env.storage_controller.tenant_shard_split(tenant_c, shard_count=shard_count)
@@ -2517,7 +2509,7 @@ def test_storage_controller_validate_during_migration(neon_env_builder: NeonEnvB

    tenant_id = env.initial_tenant
    timeline_id = env.initial_timeline
-    env.neon_cli.create_tenant(tenant_id, timeline_id)
+    env.create_tenant(tenant_id, timeline_id)
    env.storage_controller.pageserver_api().set_tenant_config(tenant_id, TENANT_CONF)

    # Write enough data that a compaction would do some work (deleting some L0s)
@@ -2652,7 +2644,7 @@ def test_storage_controller_proxy_during_migration(

    tenant_id = env.initial_tenant
    timeline_id = env.initial_timeline
-    env.neon_cli.create_tenant(tenant_id, timeline_id)
+    env.create_tenant(tenant_id, timeline_id)

    # The test stalls a reconcile on purpose to check if the long running
    # reconcile alert fires.
@@ -2831,7 +2823,7 @@ def test_shard_preferred_azs(neon_env_builder: NeonEnvBuilder):
    # Generate a layer to avoid shard split handling on ps from tripping
    # up on debug assert.
    timeline_id = TimelineId.generate()
-    env.neon_cli.create_timeline("bar", tids[0], timeline_id)
+    env.create_timeline("bar", tids[0], timeline_id)

    workload = Workload(env, tids[0], timeline_id, branch_name="bar")
    workload.init()
@@ -2919,3 +2911,97 @@ def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, mi
        # Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
        env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
        raise
+
+
+@run_only_on_default_postgres("Postgres version makes no difference here")
+@pytest.mark.parametrize(
+    "migration_failpoint",
+    [
+        MigrationFailpoints.PRE_GENERATION_INC,
+        MigrationFailpoints.POST_NOTIFY,
+        MigrationFailpoints.POST_DETACH,
+    ],
+)
+def test_multi_attached_timeline_creation(neon_env_builder: NeonEnvBuilder, migration_failpoint):
+    neon_env_builder.num_pageservers = 2
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    tenant_id = TenantId.generate()
+    env.storage_controller.tenant_create(tenant_id, placement_policy={"Attached": 1})
+
+    shard_zero = TenantShardId(tenant_id, 0, 0)
+    locations = env.storage_controller.get_tenants_placement()[str(shard_zero)]
+
+    assert locations["observed"] == locations["intent"]
+    assert locations["observed"]["attached"] is not None
+    assert len(locations["observed"]["secondary"]) > 0
+
+    attached_location = locations["observed"]["attached"]
+    secondary_location = locations["observed"]["secondary"][0]
+
+    env.storage_controller.configure_failpoints((migration_failpoint.value, "pause"))
+
+    try:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+            migrate_fut = executor.submit(
+                env.storage_controller.tenant_shard_migrate,
+                shard_zero,
+                secondary_location,
+            )
+
+            def has_hit_migration_failpoint():
+                expr = f"at failpoint {migration_failpoint.value}"
+                log.info(expr)
+                assert env.storage_controller.log_contains(expr)
+
+            wait_until(10, 1, has_hit_migration_failpoint)
+
+            timeline_id = TimelineId.generate()
+            env.storage_controller.pageserver_api().timeline_create(
+                pg_version=PgVersion.NOT_SET, tenant_id=tenant_id, new_timeline_id=timeline_id
+            )
+
+            # Timeline creation only goes to the origin.
+            if migration_failpoint == MigrationFailpoints.PRE_GENERATION_INC:
+                client = env.get_pageserver(attached_location).http_client()
+                assert timeline_id in {
+                    TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
+                }, f"new timeline not found on {attached_location}"
+
+                with pytest.raises(PageserverApiException) as exc:
+                    env.get_pageserver(secondary_location).http_client().timeline_list(tenant_id)
+                assert exc.value.status_code == 404
+
+            # Timeline creations goes to both attached locations
+            if migration_failpoint == MigrationFailpoints.POST_NOTIFY:
+                for node_id in [attached_location, secondary_location]:
+                    client = env.get_pageserver(node_id).http_client()
+                    assert timeline_id in {
+                        TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
+                    }, f"new timeline not found on {node_id}"
+
+            # Timeline creation goes both locations, but storcon gets a 404 from the origin
+            # which it ignores.
+            if migration_failpoint == MigrationFailpoints.POST_DETACH:
+                client = env.get_pageserver(secondary_location).http_client()
+                assert timeline_id in {
+                    TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
+                }, f"new timeline not found on {attached_location}"
+
+                with pytest.raises(PageserverApiException) as exc:
+                    env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
+                assert exc.value.status_code == 404
+
+            # Eventually migration completes
+            env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
+            migrate_fut.result()
+
+            # Ensure that we detached from the old attached location
+            with pytest.raises(PageserverApiException) as exc:
+                env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
+            assert exc.value.status_code == 404
+    except:
+        # Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
+        env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
+        raise
--- a/test_runner/regress/test_storage_scrubber.py
+++ b/test_runner/regress/test_storage_scrubber.py
@@ -135,7 +135,7 @@ def test_scrubber_physical_gc(neon_env_builder: NeonEnvBuilder, shard_count: Opt

    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
-    env.neon_cli.create_tenant(tenant_id, timeline_id, shard_count=shard_count)
+    env.create_tenant(tenant_id, timeline_id, shard_count=shard_count)

    workload = Workload(env, tenant_id, timeline_id)
    workload.init()
@@ -185,7 +185,7 @@ def test_scrubber_physical_gc_ancestors(

    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
-    env.neon_cli.create_tenant(
+    env.create_tenant(
        tenant_id,
        timeline_id,
        shard_count=shard_count,
@@ -303,7 +303,7 @@ def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder

    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
-    env.neon_cli.create_tenant(
+    env.create_tenant(
        tenant_id,
        timeline_id,
        shard_count=None,
@@ -385,7 +385,7 @@ def test_scrubber_physical_gc_ancestors_split(neon_env_builder: NeonEnvBuilder):
    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
    initial_shard_count = 2
-    env.neon_cli.create_tenant(
+    env.create_tenant(
        tenant_id,
        timeline_id,
        shard_count=initial_shard_count,
--- a/test_runner/regress/test_subscriber_restart.py
+++ b/test_runner/regress/test_subscriber_restart.py
@@ -9,11 +9,11 @@ from fixtures.utils import wait_until
 # It requires tracking information about replication origins at page server side
 def test_subscriber_restart(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("publisher")
+    env.create_branch("publisher")
    pub = env.endpoints.create("publisher")
    pub.start()

-    sub_timeline_id = env.neon_cli.create_branch("subscriber")
+    sub_timeline_id = env.create_branch("subscriber")
    sub = env.endpoints.create("subscriber")
    sub.start()

--- a/test_runner/regress/test_tenant_conf.py
+++ b/test_runner/regress/test_tenant_conf.py
@@ -38,7 +38,7 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
    # Check that we raise on misspelled configs
    invalid_conf_key = "some_invalid_setting_name_blah_blah_123"
    try:
-        env.neon_cli.create_tenant(
+        env.create_tenant(
            conf={
                invalid_conf_key: "20000",
            }
@@ -54,9 +54,9 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
        "evictions_low_residence_duration_metric_threshold": "42s",
        "eviction_policy": json.dumps({"kind": "NoEviction"}),
    }
-    tenant, _ = env.neon_cli.create_tenant(conf=new_conf)
+    tenant, _ = env.create_tenant(conf=new_conf)

-    env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant)
+    env.create_timeline("test_tenant_conf", tenant_id=tenant)
    env.endpoints.create_start("test_tenant_conf", "main", tenant)

    # check the configuration of the default tenant
@@ -121,10 +121,7 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
        ),
        "max_lsn_wal_lag": "13000000",
    }
-    env.neon_cli.config_tenant(
-        tenant_id=tenant,
-        conf=conf_update,
-    )
+    env.config_tenant(tenant_id=tenant, conf=conf_update)

    updated_tenant_config = http_client.tenant_config(tenant_id=tenant)
    updated_specific_config = updated_tenant_config.tenant_specific_overrides
@@ -172,10 +169,8 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
    final_conf = {
        "pitr_interval": "1 min",
    }
-    env.neon_cli.config_tenant(
-        tenant_id=tenant,
-        conf=final_conf,
-    )
+    env.config_tenant(tenant_id=tenant, conf=final_conf)
+
    final_tenant_config = http_client.tenant_config(tenant_id=tenant)
    final_specific_config = final_tenant_config.tenant_specific_overrides
    assert final_specific_config["pitr_interval"] == "1m"
@@ -218,7 +213,7 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)

    # tenant is created with defaults, as in without config file
-    (tenant_id, timeline_id) = env.neon_cli.create_tenant()
+    (tenant_id, timeline_id) = env.create_tenant()
    config_path = env.pageserver.tenant_dir(tenant_id) / "config-v1"

    http_client = env.pageserver.http_client()
@@ -240,9 +235,9 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
        func=lambda: assert_tenant_state(http_client, tenant_id, "Active"),
    )

-    env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "1000000"})
+    env.config_tenant(tenant_id, {"gc_horizon": "1000000"})
    contents_first = config_path.read_text()
-    env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "0"})
+    env.config_tenant(tenant_id, {"gc_horizon": "0"})
    contents_later = config_path.read_text()

    # dont test applying the setting here, we have that another test case to show it
@@ -298,7 +293,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
    metric = get_metric()
    assert int(metric.value) > 0, "metric is updated"

-    env.neon_cli.config_tenant(
+    env.config_tenant(
        tenant_id, {"evictions_low_residence_duration_metric_threshold": default_value}
    )
    updated_metric = get_metric()
@@ -306,9 +301,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
        metric.value
    ), "metric is unchanged when setting same value"

-    env.neon_cli.config_tenant(
-        tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"}
-    )
+    env.config_tenant(tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"})
    metric = get_metric()
    assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60
    assert int(metric.value) == 0
@@ -320,9 +313,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
    assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60
    assert int(metric.value) > 0

-    env.neon_cli.config_tenant(
-        tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"}
-    )
+    env.config_tenant(tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"})
    metric = get_metric()
    assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60
    assert int(metric.value) == 0, "value resets if label changes"
@@ -334,7 +325,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
    assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60
    assert int(metric.value) > 0, "set a non-zero value for next step"

-    env.neon_cli.config_tenant(tenant_id, {})
+    env.config_tenant(tenant_id, {})
    metric = get_metric()
    assert int(metric.labels["low_threshold_secs"]) == 24 * 60 * 60, "label resets to default"
    assert int(metric.value) == 0, "value resets to default"
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -78,7 +78,7 @@ def test_tenant_delete_smoke(
    # may need to retry on some remote storage errors injected by the test harness
    error_tolerant_delete(ps_http, tenant_id)

-    env.neon_cli.create_tenant(
+    env.create_tenant(
        tenant_id=tenant_id,
        conf=many_small_layers_tenant_config(),
    )
@@ -89,9 +89,7 @@ def test_tenant_delete_smoke(
    # create two timelines one being the parent of another
    parent = None
    for timeline in ["first", "second"]:
-        timeline_id = env.neon_cli.create_branch(
-            timeline, tenant_id=tenant_id, ancestor_branch_name=parent
-        )
+        timeline_id = env.create_branch(timeline, ancestor_branch_name=parent, tenant_id=tenant_id)
        with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
            run_pg_bench_small(pg_bin, endpoint.connstr())
            wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)
@@ -339,7 +337,7 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, make_httpserver, neon_env_builder
    ps_http = env.pageserver.http_client()
    # create a tenant separate from the main tenant so that we have one remaining
    # after we deleted it, as the scrubber treats empty buckets as an error.
-    (tenant_id, timeline_id) = env.neon_cli.create_tenant()
+    (tenant_id, timeline_id) = env.create_tenant()

    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
        run_pg_bench_small(pg_bin, endpoint.connstr())
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Conrad Ludgate	911048b2ff	break e2e test	2024-10-05 11:30:19 +01:00
Arseny Sher	eae4470bb6	safekeeper: remove local WAL files ignoring peer_horizon_lsn. (#8900 ) If peer safekeeper needs garbage collected segment it will be fetched now from s3 using on-demand WAL download. Reduces danger of running out of disk space when safekeeper fails.	2024-10-04 19:07:39 +03:00
Ivan Efremov	2d248aea6f	proxy: exclude triple logging of connect compute errors (#9277 ) Fixes (#9020) - Use the compute::COULD_NOT_CONNECT for connection error message; - Eliminate logging for one connection attempt; - Typo fix.	2024-10-04 18:21:39 +03:00
Conrad Ludgate	6c05f89f7d	proxy: add local-proxy to compute image (#8823 ) 1. Adds local-proxy to compute image and vm spec 2. Updates local-proxy config processing, writing PID to a file eagerly 3. Updates compute-ctl to understand local proxy compute spec and to send SIGHUP to local-proxy over that pid. closes https://github.com/neondatabase/cloud/issues/16867	2024-10-04 14:52:01 +00:00
Arseny Sher	db53f98725	neon walsender_hooks: take basebackup LSN directly. (#9263 ) NeonWALReader needs to know LSN before which WAL is not available locally, that is, basebackup LSN. Previously it was taken from WalpropShmemState, but that's racy, as walproposer sets its there only after successfull election. Get it directly with GetRedoStartLsn. Should fix flakiness of test_ondemand_wal_download_in_replication_slot_funcs etc. ref #9201	2024-10-04 14:56:15 +01:00
Erik Grinaker	04a6222418	remote_storage: add `head_object` integration test (#9274 )	2024-10-04 12:40:41 +01:00
Vlad Lazar	dcf7af5a16	storcon: do timeline creation on all attached location (#9237 ) ## Problem Creation of a timelines during a reconciliation can lead to unavailability if the user attempts to start a compute before the storage controller has notified cplane of the cut-over. ## Summary of changes Create timelines on all currently attached locations. For the latest location, we still look at the database (this is a previously). With this change we also look into the observed state to find other attached locations. Related https://github.com/neondatabase/neon/issues/9144	2024-10-04 11:56:43 +01:00
Erik Grinaker	37158d0424	pageserver: use conditional GET for secondary tenant heatmaps (#9236 ) ## Problem Secondary tenant heatmaps were always downloaded, even when they hadn't changed. This can be avoided by using a conditional GET request passing the `ETag` of the previous heatmap. ## Summary of changes The `ETag` was already plumbed down into the heatmap downloader, and just needed further plumbing into the remote storage backends. * Add a `DownloadOpts` struct and pass it to `RemoteStorage::download()`. * Add an optional `DownloadOpts::etag` field, which uses a conditional GET and returns `DownloadError::Unmodified` on match.	2024-10-04 12:29:48 +02:00
Erik Grinaker	60fb840e1f	Cargo.toml: enable `sso` for `aws-config` (#9261 ) ## Problem The S3 tests couldn't use SSO authentication for local tests against S3. ## Summary of changes Enable the `sso` feature of `aws-config`. Also run `cargo hakari generate` which made some updates to `workspace_hack`.	2024-10-04 11:27:06 +01:00
Heikki Linnakangas	52232dd85c	tests: Add a comment explaining the rules of NeonLocalCli wrappers (#9195 )	2024-10-03 22:03:29 +03:00
Heikki Linnakangas	8ef0c38b23	tests: Rename NeonLocalCli functions to match the 'neon_local' commands (#9195 ) This makes it more clear that the functions in NeonLocalCli are just typed wrappers around the corresponding 'neon_local' commands.	2024-10-03 22:03:27 +03:00
Heikki Linnakangas	56bb1ac458	tests: Move NeonCli and friends to separate file (#9195 ) In the passing, rename it to NeonLocalCli, to reflect that the binary is called 'neon_local'. Add wrapper for the 'timeline_import' command, eliminating the last raw call to the raw_cli() function from tests, except for a few in test_neon_cli.py which are about testing the 'neon_local' iteself. All the other calls are now made through the strongly-typed wrapper functions	2024-10-03 22:03:25 +03:00
Heikki Linnakangas	19db9e9aad	tests: Replace direct calls to neon_cli with wrappers in NeonEnv (#9195 ) Add wrappers for a few commands that didn't have them before. Move the logic to generate tenant and timeline IDs from NeonCli to the callers, so that NeonCli is more purely just a type-safe wrapper around 'neon_local'.	2024-10-03 22:03:22 +03:00