pq client: renames

implement a benchmark for tokio tcp handling to figure out perf bottleneck in no_op libpq benchmark
implement a standalone no-op server
2026-05-23 16:10:37 +00:00 · 2023-11-08 16:53:06 +00:00 · 2023-11-08 16:49:03 +00:00 · 2023-11-08 16:49:03 +00:00 · 2023-11-08 16:49:03 +00:00 · 2023-11-08 16:49:03 +00:00
57 changed files with 1759 additions and 796 deletions
--- a/.github/ISSUE_TEMPLATE/epic-template.md
+++ b/.github/ISSUE_TEMPLATE/epic-template.md
@@ -17,9 +17,8 @@ assignees: ''
 ## Implementation ideas


-```[tasklist]
-### Tasks
-```
+## Tasks
+- [ ]


 ## Other related tasks and Epics
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2927,6 +2927,16 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "nu-ansi-term"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
+dependencies = [
+ "overload",
+ "winapi",
+]
+
 [[package]]
 name = "num-bigint"
 version = "0.4.3"
@@ -3193,6 +3203,12 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"

+[[package]]
+name = "overload"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
+
 [[package]]
 name = "pagectl"
 version = "0.1.0"
@@ -3278,10 +3294,12 @@ dependencies = [
 "tokio",
 "tokio-io-timeout",
 "tokio-postgres",
+ "tokio-stream",
 "tokio-tar",
 "tokio-util",
 "toml_edit",
 "tracing",
+ "tracing-subscriber",
 "url",
 "utils",
 "walkdir",
@@ -3556,7 +3574,7 @@ dependencies = [
 [[package]]
 name = "postgres"
 version = "0.19.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -3569,7 +3587,7 @@ dependencies = [
 [[package]]
 name = "postgres-native-tls"
 version = "0.5.0"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
 dependencies = [
 "native-tls",
 "tokio",
@@ -3580,7 +3598,7 @@ dependencies = [
 [[package]]
 name = "postgres-protocol"
 version = "0.6.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
 dependencies = [
 "base64 0.20.0",
 "byteorder",
@@ -3598,7 +3616,7 @@ dependencies = [
 [[package]]
 name = "postgres-types"
 version = "0.2.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -4064,7 +4082,6 @@ dependencies = [
 "aws-config",
 "aws-credential-types",
 "aws-sdk-s3",
- "aws-smithy-async",
 "aws-smithy-http",
 "aws-types",
 "azure_core",
@@ -5415,7 +5432,7 @@ dependencies = [
 [[package]]
 name = "tokio-postgres"
 version = "0.7.7"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
 dependencies = [
 "async-trait",
 "byteorder",
@@ -5772,6 +5789,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
 dependencies = [
 "matchers",
+ "nu-ansi-term",
 "once_cell",
 "regex",
 "serde",
@@ -6483,7 +6501,6 @@ dependencies = [
 "clap",
 "clap_builder",
 "crossbeam-utils",
- "dashmap",
 "either",
 "fail",
 "futures",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -48,7 +48,6 @@ async-trait = "0.1"
 aws-config = { version = "0.56", default-features = false, features=["rustls"] }
 aws-sdk-s3 = "0.29"
 aws-smithy-http = "0.56"
-aws-smithy-async = { version = "0.56", default-features = false, features=["rt-tokio"] }
 aws-credential-types = "0.56"
 aws-types = "0.56"
 axum = { version = "0.6.20", features = ["ws"] }
@@ -67,7 +66,7 @@ comfy-table = "6.1"
 const_format = "0.2"
 crc32c = "0.6"
 crossbeam-utils = "0.8.5"
-dashmap = { version = "5.5.0", features = ["raw-api"] }
+dashmap = "5.5.0"
 either = "1.8"
 enum-map = "2.4.2"
 enumset = "1.0.12"
@@ -164,11 +163,11 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" }
-postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }

 ## Other git libraries
 heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
@@ -205,7 +204,7 @@ tonic-build = "0.9"

 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }

 ################# Binary contents sections

--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -78,7 +78,7 @@ use regex::Regex;
 use remote_storage::*;
 use serde_json;
 use std::io::Read;
-use std::num::NonZeroUsize;
+use std::num::{NonZeroU32, NonZeroUsize};
 use std::path::Path;
 use std::str;
 use tar::Archive;
@@ -281,6 +281,8 @@ pub fn init_remote_storage(remote_ext_config: &str) -> anyhow::Result<GenericRem
        max_keys_per_list_response: None,
    };
    let config = RemoteStorageConfig {
+        max_concurrent_syncs: NonZeroUsize::new(100).expect("100 != 0"),
+        max_sync_errors: NonZeroU32::new(100).expect("100 != 0"),
        storage: RemoteStorageKind::AwsS3(config),
    };
    GenericRemoteStorage::from_config(&config)
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -18,7 +18,7 @@ use utils::{

 use crate::reltag::RelTag;
 use anyhow::bail;
-use bytes::{BufMut, Bytes, BytesMut};
+use bytes::{Buf, BufMut, Bytes, BytesMut};

 /// The state of a tenant in this pageserver.
 ///
@@ -572,15 +572,18 @@ pub enum PagestreamFeMessage {
    Nblocks(PagestreamNblocksRequest),
    GetPage(PagestreamGetPageRequest),
    DbSize(PagestreamDbSizeRequest),
+    NoOp,
 }

 // Wrapped in libpq CopyData
+#[derive(Debug)]
 pub enum PagestreamBeMessage {
    Exists(PagestreamExistsResponse),
    Nblocks(PagestreamNblocksResponse),
    GetPage(PagestreamGetPageResponse),
    Error(PagestreamErrorResponse),
    DbSize(PagestreamDbSizeResponse),
+    NoOp,
 }

 #[derive(Debug, PartialEq, Eq)]
@@ -679,6 +682,10 @@ impl PagestreamFeMessage {
                bytes.put_u64(req.lsn.0);
                bytes.put_u32(req.dbnode);
            }
+
+            Self::NoOp => {
+                bytes.put_u8(4);
+            }
        }

        bytes.into()
@@ -729,6 +736,7 @@ impl PagestreamFeMessage {
                lsn: Lsn::from(body.read_u64::<BigEndian>()?),
                dbnode: body.read_u32::<BigEndian>()?,
            })),
+            4 => Ok(PagestreamFeMessage::NoOp),
            _ => bail!("unknown smgr message tag: {:?}", msg_tag),
        }
    }
@@ -763,10 +771,46 @@ impl PagestreamBeMessage {
                bytes.put_u8(104); /* tag from pagestore_client.h */
                bytes.put_i64(resp.db_size);
            }
+            Self::NoOp => {
+                bytes.put_u8(105);
+            }
        }

        bytes.into()
    }
+
+    pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
+        let mut buf = buf.reader();
+        let msg_tag = buf.read_u8()?;
+        match msg_tag {
+            100 => todo!(),
+            101 => todo!(),
+            102 => {
+                let buf = buf.get_ref();
+                /* TODO use constant */
+                if buf.len() == 8192 {
+                    Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
+                        page: buf.clone(),
+                    }))
+                } else {
+                    anyhow::bail!("invalid page size: {}", buf.len());
+                }
+            }
+            103 => {
+                let buf = buf.get_ref();
+                let cstr = std::ffi::CStr::from_bytes_until_nul(buf)?;
+                let rust_str = cstr.to_str()?;
+                Ok(PagestreamBeMessage::Error(PagestreamErrorResponse {
+                    message: rust_str.to_owned(),
+                }))
+            }
+            104 => todo!(),
+            105 => {
+                Ok(PagestreamBeMessage::NoOp)
+            },
+            _ => bail!("unknown tag: {:?}", msg_tag),
+        }
+    }
 }

 #[cfg(test)]
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -17,7 +17,7 @@ use std::{fmt, io};
 use std::{future::Future, str::FromStr};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_rustls::TlsAcceptor;
-use tracing::{debug, error, info, trace, warn};
+use tracing::{debug, error, info, trace};

 use pq_proto::framed::{ConnectionError, Framed, FramedReader, FramedWriter};
 use pq_proto::{
@@ -35,11 +35,6 @@ pub enum QueryError {
    /// We were instructed to shutdown while processing the query
    #[error("Shutting down")]
    Shutdown,
-    /// Authentication failure
-    #[error("Unauthorized: {0}")]
-    Unauthorized(std::borrow::Cow<'static, str>),
-    #[error("Simulated Connection Error")]
-    SimulatedConnectionError,
    /// Some other error
    #[error(transparent)]
    Other(#[from] anyhow::Error),
@@ -54,9 +49,8 @@ impl From<io::Error> for QueryError {
 impl QueryError {
    pub fn pg_error_code(&self) -> &'static [u8; 5] {
        match self {
-            Self::Disconnected(_) | Self::SimulatedConnectionError => b"08006", // connection failure
+            Self::Disconnected(_) => b"08006", // connection failure
            Self::Shutdown => SQLSTATE_ADMIN_SHUTDOWN,
-            Self::Unauthorized(_) => SQLSTATE_INTERNAL_ERROR,
            Self::Other(_) => SQLSTATE_INTERNAL_ERROR, // internal error
        }
    }
@@ -616,7 +610,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {

                    if let Err(e) = handler.check_auth_jwt(self, jwt_response) {
                        self.write_message_noflush(&BeMessage::ErrorResponse(
-                            &short_error(&e),
+                            &e.to_string(),
                            Some(e.pg_error_code()),
                        ))?;
                        return Err(e);
@@ -738,9 +732,6 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
                if let Err(e) = handler.process_query(self, query_string).await {
                    match e {
                        QueryError::Shutdown => return Ok(ProcessMsgResult::Break),
-                        QueryError::SimulatedConnectionError => {
-                            return Err(QueryError::SimulatedConnectionError)
-                        }
                        e => {
                            log_query_error(query_string, &e);
                            let short_error = short_error(&e);
@@ -975,8 +966,6 @@ pub fn short_error(e: &QueryError) -> String {
    match e {
        QueryError::Disconnected(connection_error) => connection_error.to_string(),
        QueryError::Shutdown => "shutdown".to_string(),
-        QueryError::Unauthorized(_e) => "JWT authentication error".to_string(),
-        QueryError::SimulatedConnectionError => "simulated connection error".to_string(),
        QueryError::Other(e) => format!("{e:#}"),
    }
 }
@@ -993,15 +982,9 @@ fn log_query_error(query: &str, e: &QueryError) {
        QueryError::Disconnected(other_connection_error) => {
            error!("query handler for '{query}' failed with connection error: {other_connection_error:?}")
        }
-        QueryError::SimulatedConnectionError => {
-            error!("query handler for query '{query}' failed due to a simulated connection error")
-        }
        QueryError::Shutdown => {
            info!("query handler for '{query}' cancelled during tenant shutdown")
        }
-        QueryError::Unauthorized(e) => {
-            warn!("query handler for '{query}' failed with authentication error: {e}");
-        }
        QueryError::Other(e) => {
            error!("query handler for '{query}' failed: {e:?}");
        }
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -8,7 +8,6 @@ license.workspace = true
 anyhow.workspace = true
 async-trait.workspace = true
 once_cell.workspace = true
-aws-smithy-async.workspace = true
 aws-smithy-http.workspace = true
 aws-types.workspace = true
 aws-config.workspace = true
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -14,7 +14,13 @@ mod local_fs;
 mod s3_bucket;
 mod simulate_failures;

-use std::{collections::HashMap, fmt::Debug, num::NonZeroUsize, pin::Pin, sync::Arc};
+use std::{
+    collections::HashMap,
+    fmt::Debug,
+    num::{NonZeroU32, NonZeroUsize},
+    pin::Pin,
+    sync::Arc,
+};

 use anyhow::{bail, Context};
 use camino::{Utf8Path, Utf8PathBuf};
@@ -30,6 +36,12 @@ pub use self::{
 };
 use s3_bucket::RequestKind;

+/// How many different timelines can be processed simultaneously when synchronizing layers with the remote storage.
+/// During regular work, pageserver produces one layer file per timeline checkpoint, with bursts of concurrency
+/// during start (where local and remote timelines are compared and initial sync tasks are scheduled) and timeline attach.
+/// Both cases may trigger timeline download, that might download a lot of layers. This concurrency is limited by the clients internally, if needed.
+pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS: usize = 50;
+pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
 /// Currently, sync happens with AWS S3, that has two limits on requests per second:
 /// ~200 RPS for IAM services
 /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
@@ -102,7 +114,7 @@ impl RemotePath {
        self.0.file_name()
    }

-    pub fn join(&self, segment: &Utf8Path) -> Self {
+    pub fn join<P: AsRef<Utf8Path>>(&self, segment: P) -> Self {
        Self(self.0.join(segment))
    }

@@ -431,6 +443,10 @@ pub struct StorageMetadata(HashMap<String, String>);
 /// External backup storage configuration, enough for creating a client for that storage.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct RemoteStorageConfig {
+    /// Max allowed number of concurrent sync operations between the API user and the remote storage.
+    pub max_concurrent_syncs: NonZeroUsize,
+    /// Max allowed errors before the sync task is considered failed and evicted.
+    pub max_sync_errors: NonZeroU32,
    /// The storage connection configuration.
    pub storage: RemoteStorageKind,
 }
@@ -526,6 +542,18 @@ impl RemoteStorageConfig {

        let use_azure = container_name.is_some() && container_region.is_some();

+        let max_concurrent_syncs = NonZeroUsize::new(
+            parse_optional_integer("max_concurrent_syncs", toml)?
+                .unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS),
+        )
+        .context("Failed to parse 'max_concurrent_syncs' as a positive integer")?;
+
+        let max_sync_errors = NonZeroU32::new(
+            parse_optional_integer("max_sync_errors", toml)?
+                .unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
+        )
+        .context("Failed to parse 'max_sync_errors' as a positive integer")?;
+
        let default_concurrency_limit = if use_azure {
            DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT
        } else {
@@ -607,7 +635,11 @@ impl RemoteStorageConfig {
            }
        };

-        Ok(Some(RemoteStorageConfig { storage }))
+        Ok(Some(RemoteStorageConfig {
+            max_concurrent_syncs,
+            max_sync_errors,
+            storage,
+        }))
    }
 }

--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -4,27 +4,23 @@
 //! allowing multiple api users to independently work with the same S3 bucket, if
 //! their bucket prefixes are both specified and different.

-use std::{borrow::Cow, sync::Arc};
+use std::borrow::Cow;

 use anyhow::Context;
 use aws_config::{
    environment::credentials::EnvironmentVariableCredentialsProvider,
-    imds::credentials::ImdsCredentialsProvider,
-    meta::credentials::CredentialsProviderChain,
-    provider_config::ProviderConfig,
-    retry::{RetryConfigBuilder, RetryMode},
-    web_identity_token::WebIdentityTokenCredentialsProvider,
+    imds::credentials::ImdsCredentialsProvider, meta::credentials::CredentialsProviderChain,
+    provider_config::ProviderConfig, web_identity_token::WebIdentityTokenCredentialsProvider,
 };
 use aws_credential_types::cache::CredentialsCache;
 use aws_sdk_s3::{
-    config::{AsyncSleep, Config, Region, SharedAsyncSleep},
+    config::{Config, Region},
    error::SdkError,
    operation::get_object::GetObjectError,
    primitives::ByteStream,
    types::{Delete, ObjectIdentifier},
    Client,
 };
-use aws_smithy_async::rt::sleep::TokioSleep;
 use aws_smithy_http::body::SdkBody;
 use hyper::Body;
 use scopeguard::ScopeGuard;
@@ -87,23 +83,10 @@ impl S3Bucket {
            .or_else("imds", ImdsCredentialsProvider::builder().build())
        };

-        // AWS SDK requires us to specify how the RetryConfig should sleep when it wants to back off
-        let sleep_impl: Arc<dyn AsyncSleep> = Arc::new(TokioSleep::new());
-
-        // We do our own retries (see [`backoff::retry`]).  However, for the AWS SDK to enable rate limiting in response to throttling
-        // responses (e.g. 429 on too many ListObjectsv2 requests), we must provide a retry config.  We set it to use at most one
-        // attempt, and enable 'Adaptive' mode, which causes rate limiting to be enabled.
-        let mut retry_config = RetryConfigBuilder::new();
-        retry_config
-            .set_max_attempts(Some(1))
-            .set_mode(Some(RetryMode::Adaptive));
-
        let mut config_builder = Config::builder()
            .region(region)
            .credentials_cache(CredentialsCache::lazy())
-            .credentials_provider(credentials_provider)
-            .sleep_impl(SharedAsyncSleep::from(sleep_impl))
-            .retry_config(retry_config.build());
+            .credentials_provider(credentials_provider);

        if let Some(custom_endpoint) = aws_config.endpoint.clone() {
            config_builder = config_builder
--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -1,6 +1,6 @@
 use std::collections::HashSet;
 use std::env;
-use std::num::NonZeroUsize;
+use std::num::{NonZeroU32, NonZeroUsize};
 use std::ops::ControlFlow;
 use std::path::PathBuf;
 use std::sync::Arc;
@@ -469,6 +469,8 @@ fn create_azure_client(
    let random = rand::thread_rng().gen::<u32>();

    let remote_storage_config = RemoteStorageConfig {
+        max_concurrent_syncs: NonZeroUsize::new(100).unwrap(),
+        max_sync_errors: NonZeroU32::new(5).unwrap(),
        storage: RemoteStorageKind::AzureContainer(AzureConfig {
            container_name: remote_storage_azure_container,
            container_region: remote_storage_azure_region,
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -1,6 +1,6 @@
 use std::collections::HashSet;
 use std::env;
-use std::num::NonZeroUsize;
+use std::num::{NonZeroU32, NonZeroUsize};
 use std::ops::ControlFlow;
 use std::path::PathBuf;
 use std::sync::Arc;
@@ -396,6 +396,8 @@ fn create_s3_client(
    let random = rand::thread_rng().gen::<u32>();

    let remote_storage_config = RemoteStorageConfig {
+        max_concurrent_syncs: NonZeroUsize::new(100).unwrap(),
+        max_sync_errors: NonZeroU32::new(5).unwrap(),
        storage: RemoteStorageKind::AwsS3(S3Config {
            bucket_name: remote_storage_s3_bucket,
            bucket_region: remote_storage_s3_region,
--- a/libs/utils/src/auth.rs
+++ b/libs/utils/src/auth.rs
@@ -2,7 +2,7 @@

 use arc_swap::ArcSwap;
 use serde;
-use std::{borrow::Cow, fmt::Display, fs, sync::Arc};
+use std::{fs, sync::Arc};

 use anyhow::Result;
 use camino::Utf8Path;
@@ -11,7 +11,7 @@ use jsonwebtoken::{
 };
 use serde::{Deserialize, Serialize};

-use crate::{http::error::ApiError, id::TenantId};
+use crate::id::TenantId;

 /// Algorithm to use. We require EdDSA.
 const STORAGE_TOKEN_ALGORITHM: Algorithm = Algorithm::EdDSA;
@@ -54,7 +54,7 @@ impl SwappableJwtAuth {
    pub fn swap(&self, jwt_auth: JwtAuth) {
        self.0.swap(Arc::new(jwt_auth));
    }
-    pub fn decode(&self, token: &str) -> std::result::Result<TokenData<Claims>, AuthError> {
+    pub fn decode(&self, token: &str) -> Result<TokenData<Claims>> {
        self.0.load().decode(token)
    }
 }
@@ -65,24 +65,6 @@ impl std::fmt::Debug for SwappableJwtAuth {
    }
 }

-#[derive(Clone, PartialEq, Eq, Hash, Debug)]
-pub struct AuthError(pub Cow<'static, str>);
-
-impl Display for AuthError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-impl From<AuthError> for ApiError {
-    fn from(_value: AuthError) -> Self {
-        // Don't pass on the value of the AuthError as a precautionary measure.
-        // Being intentionally vague in public error communication hurts debugability
-        // but it is more secure.
-        ApiError::Forbidden("JWT authentication error".to_string())
-    }
-}
-
 pub struct JwtAuth {
    decoding_keys: Vec<DecodingKey>,
    validation: Validation,
@@ -132,7 +114,7 @@ impl JwtAuth {
    /// The function tries the stored decoding keys in succession,
    /// and returns the first yielding a successful result.
    /// If there is no working decoding key, it returns the last error.
-    pub fn decode(&self, token: &str) -> std::result::Result<TokenData<Claims>, AuthError> {
+    pub fn decode(&self, token: &str) -> Result<TokenData<Claims>> {
        let mut res = None;
        for decoding_key in &self.decoding_keys {
            res = Some(decode(token, decoding_key, &self.validation));
@@ -141,9 +123,9 @@ impl JwtAuth {
            }
        }
        if let Some(res) = res {
-            res.map_err(|e| AuthError(Cow::Owned(e.to_string())))
+            res.map_err(anyhow::Error::new)
        } else {
-            Err(AuthError(Cow::Borrowed("no JWT decoding keys configured")))
+            anyhow::bail!("no JWT decoding keys configured")
        }
    }
 }
@@ -184,9 +166,9 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
 "#;

    #[test]
-    fn test_decode() {
+    fn test_decode() -> Result<(), anyhow::Error> {
        let expected_claims = Claims {
-            tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081").unwrap()),
+            tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081")?),
            scope: Scope::Tenant,
        };

@@ -205,24 +187,28 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
        let encoded_eddsa = "eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzY29wZSI6InRlbmFudCIsInRlbmFudF9pZCI6IjNkMWY3NTk1YjQ2ODIzMDMwNGUwYjczY2VjYmNiMDgxIiwiaXNzIjoibmVvbi5jb250cm9scGxhbmUiLCJleHAiOjE3MDkyMDA4NzksImlhdCI6MTY3ODQ0MjQ3OX0.U3eA8j-uU-JnhzeO3EDHRuXLwkAUFCPxtGHEgw6p7Ccc3YRbFs2tmCdbD9PZEXP-XsxSeBQi1FY0YPcT3NXADw";

        // Check it can be validated with the public key
-        let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap()]);
-        let claims_from_token = auth.decode(encoded_eddsa).unwrap().claims;
+        let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519)?]);
+        let claims_from_token = auth.decode(encoded_eddsa)?.claims;
        assert_eq!(claims_from_token, expected_claims);
+
+        Ok(())
    }

    #[test]
-    fn test_encode() {
+    fn test_encode() -> Result<(), anyhow::Error> {
        let claims = Claims {
-            tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081").unwrap()),
+            tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081")?),
            scope: Scope::Tenant,
        };

-        let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519).unwrap();
+        let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519)?;

        // decode it back
-        let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap()]);
-        let decoded = auth.decode(&encoded).unwrap();
+        let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519)?]);
+        let decoded = auth.decode(&encoded)?;

        assert_eq!(decoded.claims, claims);
+
+        Ok(())
    }
 }
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -1,4 +1,4 @@
-use crate::auth::{AuthError, Claims, SwappableJwtAuth};
+use crate::auth::{Claims, SwappableJwtAuth};
 use crate::http::error::{api_error_handler, route_error_handler, ApiError};
 use anyhow::Context;
 use hyper::header::{HeaderName, AUTHORIZATION};
@@ -400,11 +400,9 @@ pub fn auth_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
                    })?;
                    let token = parse_token(header_value)?;

-                    let data = auth.decode(token).map_err(|err| {
-                        warn!("Authentication error: {err}");
-                        // Rely on From<AuthError> for ApiError impl
-                        err
-                    })?;
+                    let data = auth
+                        .decode(token)
+                        .map_err(|_| ApiError::Unauthorized("malformed jwt token".to_string()))?;
                    req.set_context(data.claims);
                }
                None => {
@@ -452,11 +450,12 @@ where

 pub fn check_permission_with(
    req: &Request<Body>,
-    check_permission: impl Fn(&Claims) -> Result<(), AuthError>,
+    check_permission: impl Fn(&Claims) -> Result<(), anyhow::Error>,
 ) -> Result<(), ApiError> {
    match req.context::<Claims>() {
-        Some(claims) => Ok(check_permission(&claims)
-            .map_err(|_err| ApiError::Forbidden("JWT authentication error".to_string()))?),
+        Some(claims) => {
+            Ok(check_permission(&claims).map_err(|err| ApiError::Forbidden(err.to_string()))?)
+        }
        None => Ok(()), // claims is None because auth is disabled
    }
 }
--- a/libs/utils/src/http/error.rs
+++ b/libs/utils/src/http/error.rs
@@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
 use std::borrow::Cow;
 use std::error::Error as StdError;
 use thiserror::Error;
-use tracing::{error, info, warn};
+use tracing::{error, info};

 #[derive(Debug, Error)]
 pub enum ApiError {
@@ -118,9 +118,6 @@ pub fn api_error_handler(api_error: ApiError) -> Response<Body> {
    // Print a stack trace for Internal Server errors

    match api_error {
-        ApiError::Forbidden(_) | ApiError::Unauthorized(_) => {
-            warn!("Error processing HTTP request: {api_error:#}")
-        }
        ApiError::ResourceUnavailable(_) => info!("Error processing HTTP request: {api_error:#}"),
        ApiError::NotFound(_) => info!("Error processing HTTP request: {api_error:#}"),
        ApiError::InternalServerError(_) => error!("Error processing HTTP request: {api_error:?}"),
--- a/libs/utils/src/seqwait.rs
+++ b/libs/utils/src/seqwait.rs
@@ -125,9 +125,6 @@ where
            // Wake everyone with an error.
            let mut internal = self.internal.lock().unwrap();

-            // Block any future waiters from starting
-            internal.shutdown = true;
-
            // This will steal the entire waiters map.
            // When we drop it all waiters will be woken.
            mem::take(&mut internal.waiters)
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -188,7 +188,6 @@ extern "C" fn recovery_download(
    }
 }

-#[allow(clippy::unnecessary_cast)]
 extern "C" fn wal_read(
    sk: *mut Safekeeper,
    buf: *mut ::std::os::raw::c_char,
@@ -422,7 +421,6 @@ impl std::fmt::Display for Level {
 }

 /// Take ownership of `Vec<u8>` from StringInfoData.
-#[allow(clippy::unnecessary_cast)]
 pub(crate) fn take_vec_u8(pg: &mut StringInfoData) -> Option<Vec<u8>> {
    if pg.data.is_null() {
        return None;
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -186,7 +186,7 @@ impl Wrapper {
            .unwrap()
            .into_bytes_with_nul();
        assert!(safekeepers_list_vec.len() == safekeepers_list_vec.capacity());
-        let safekeepers_list = safekeepers_list_vec.as_mut_ptr() as *mut std::ffi::c_char;
+        let safekeepers_list = safekeepers_list_vec.as_mut_ptr() as *mut i8;

        let callback_data = Box::into_raw(Box::new(api)) as *mut ::std::os::raw::c_void;

--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -82,6 +82,8 @@ enum-map.workspace = true
 enumset.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
+tokio-stream.workspace = true
+tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }

 [dev-dependencies]
 criterion.workspace = true
--- a/pageserver/src/auth.rs
+++ b/pageserver/src/auth.rs
@@ -1,21 +1,22 @@
-use utils::auth::{AuthError, Claims, Scope};
+use anyhow::{bail, Result};
+use utils::auth::{Claims, Scope};
 use utils::id::TenantId;

-pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<(), AuthError> {
+pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<()> {
    match (&claims.scope, tenant_id) {
-        (Scope::Tenant, None) => Err(AuthError(
-            "Attempt to access management api with tenant scope. Permission denied".into(),
-        )),
+        (Scope::Tenant, None) => {
+            bail!("Attempt to access management api with tenant scope. Permission denied")
+        }
        (Scope::Tenant, Some(tenant_id)) => {
            if claims.tenant_id.unwrap() != tenant_id {
-                return Err(AuthError("Tenant id mismatch. Permission denied".into()));
+                bail!("Tenant id mismatch. Permission denied")
            }
            Ok(())
        }
        (Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope
        (Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope
-        (Scope::SafekeeperData, _) => Err(AuthError(
-            "SafekeeperData scope makes no sense for Pageserver".into(),
-        )),
+        (Scope::SafekeeperData, _) => {
+            bail!("SafekeeperData scope makes no sense for Pageserver")
+        }
    }
 }
--- a/pageserver/src/bin/getpage_bench_http.rs
+++ b/pageserver/src/bin/getpage_bench_http.rs
@@ -0,0 +1,245 @@
+use clap::Parser;
+use hyper::client::conn::Parts;
+use hyper::client::HttpConnector;
+use hyper::{Body, Client, Uri};
+use pageserver::{repository, tenant};
+use rand::prelude::*;
+use std::env::args;
+use std::future::Future;
+use std::str::FromStr;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::{Arc, Mutex};
+use std::thread;
+use tokio::sync::mpsc::{channel, Sender};
+use tokio::sync::Mutex as AsyncMutex;
+use tokio::task::JoinHandle;
+
+struct Key(repository::Key);
+
+impl std::str::FromStr for Key {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        repository::Key::from_hex(s).map(Key)
+    }
+}
+
+struct KeyRange {
+    start: Key,
+    end: Key,
+}
+
+impl KeyRange {
+    fn len(&self) -> i128 {
+        self.end.0.to_i128() - self.start.0.to_i128()
+    }
+}
+
+#[derive(clap::Parser)]
+struct Args {
+    #[clap(long, default_value = "http://localhost:9898")]
+    ps_endpoint: String,
+    // tenant_id: String,
+    // timeline_id: String,
+    num_tasks: usize,
+    num_requests: usize,
+    tenants: Option<Vec<String>>,
+    #[clap(long)]
+    pick_n_tenants: Option<usize>,
+}
+
+#[derive(Debug, Default)]
+struct Stats {
+    completed_requests: AtomicU64,
+}
+
+impl Stats {
+    fn inc(&self) {
+        self.completed_requests.fetch_add(1, Ordering::Relaxed);
+    }
+}
+
+#[tokio::main]
+async fn main() {
+    let args: &'static Args = Box::leak(Box::new(Args::parse()));
+
+    let client = Client::new();
+
+    let tenants = if let Some(tenants) = &args.tenants {
+        tenants.clone()
+    } else {
+        // let tenant_id = "b97965931096047b2d54958756baee7b";
+        // let timeline_id = "2868f84a8d166779e4c651b116c45059";
+
+        let resp = client
+            .get(Uri::try_from(&format!("{}/v1/tenant", args.ps_endpoint)).unwrap())
+            .await
+            .unwrap();
+
+        let body = hyper::body::to_bytes(resp).await.unwrap();
+        let tenants: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        let mut out = Vec::new();
+        for t in tenants.as_array().unwrap() {
+            if let Some(limit) = args.pick_n_tenants {
+                if out.len() >= limit {
+                    break;
+                }
+            }
+            out.push(t.get("id").unwrap().as_str().unwrap().to_owned());
+        }
+        if let Some(limit) = args.pick_n_tenants {
+            assert_eq!(out.len(), limit);
+        }
+        out
+    };
+
+    let mut tenant_timelines = Vec::new();
+    for tenant_id in tenants {
+        let resp = client
+            .get(
+                Uri::try_from(&format!(
+                    "{}/v1/tenant/{}/timeline",
+                    args.ps_endpoint, tenant_id
+                ))
+                .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        let body = hyper::body::to_bytes(resp).await.unwrap();
+        let timelines: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        for t in timelines.as_array().unwrap() {
+            let timeline_id = t.get("timeline_id").unwrap().as_str().unwrap().to_owned();
+            tenant_timelines.push((tenant_id.clone(), timeline_id));
+        }
+    }
+    println!("tenant_timelines:\n{:?}", tenant_timelines);
+
+    let mut stats = Arc::new(Stats::default());
+
+    tokio::spawn({
+        let stats = Arc::clone(&stats);
+        async move {
+            loop {
+                let start = std::time::Instant::now();
+                tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+                let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
+                let elapsed = start.elapsed();
+                println!(
+                    "RPS: {:.0}",
+                    completed_requests as f64 / elapsed.as_secs_f64()
+                );
+            }
+        }
+    });
+
+    let mut tasks = Vec::new();
+    for (tenant_id, timeline_id) in tenant_timelines {
+        let t = tokio::spawn(timeline(
+            args,
+            client.clone(),
+            tenant_id,
+            timeline_id,
+            Arc::clone(&stats),
+        ));
+        tasks.push(t);
+    }
+
+    for t in tasks {
+        t.await.unwrap();
+    }
+}
+
+fn timeline(
+    args: &'static Args,
+    client: Client<HttpConnector, Body>,
+    tenant_id: String,
+    timeline_id: String,
+    stats: Arc<Stats>,
+) -> impl Future<Output = ()> {
+    async move {
+        let mut resp = client
+            .get(
+                Uri::try_from(&format!(
+                    "{}/v1/tenant/{}/timeline/{}/keyspace",
+                    args.ps_endpoint, tenant_id, timeline_id
+                ))
+                .unwrap(),
+            )
+            .await
+            .unwrap();
+        if !resp.status().is_success() {
+            panic!("Failed to get keyspace: {resp:?}");
+        }
+        let body = hyper::body::to_bytes(resp).await.unwrap();
+        let keyspace: serde_json::Value = serde_json::from_slice(&body).unwrap();
+
+        let lsn = Arc::new(keyspace["at_lsn"].as_str().unwrap().to_owned());
+
+        let ranges = keyspace["keys"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .map(|r| {
+                let r = r.as_array().unwrap();
+                assert_eq!(r.len(), 2);
+                let start = Key::from_str(r[0].as_str().unwrap()).unwrap();
+                let end = Key::from_str(r[1].as_str().unwrap()).unwrap();
+                KeyRange { start, end }
+            })
+            .collect::<Vec<_>>();
+
+        // weighted ranges
+        let weights = ranges.iter().map(|r| r.len()).collect::<Vec<_>>();
+
+        let ranges = Arc::new(ranges);
+        let weights = Arc::new(weights);
+
+        let (tx, mut rx) = channel::<i32>(1000);
+        let tx = Arc::new(AsyncMutex::new(tx));
+
+        let mut tasks = Vec::<JoinHandle<()>>::new();
+
+        let start = std::time::Instant::now();
+
+        for i in 0..args.num_tasks {
+            let ranges = ranges.clone();
+            let weights = weights.clone();
+            let lsn = lsn.clone();
+            let client = client.clone();
+            let tenant_id = tenant_id.clone();
+            let timeline_id = timeline_id.clone();
+            let stats = Arc::clone(&stats);
+            let task = tokio::spawn(async move {
+                for i in 0..args.num_requests {
+                    let key = {
+                        let mut rng = rand::thread_rng();
+                        let r = ranges.choose_weighted(&mut rng, |r| r.len()).unwrap();
+                        let key = rng.gen_range((r.start.0.to_i128()..r.end.0.to_i128()));
+                        key
+                    };
+                    let url = format!(
+                        "{}/v1/tenant/{}/timeline/{}/getpage?key={:036x}&lsn={}",
+                        args.ps_endpoint, tenant_id, timeline_id, key, lsn
+                    );
+                    let uri = url.parse::<Uri>().unwrap();
+                    let resp = client.get(uri).await.unwrap();
+                    stats.inc();
+                }
+            });
+            tasks.push(task);
+        }
+
+        drop(tx);
+
+        for task in tasks {
+            task.await.unwrap();
+        }
+
+        let elapsed = start.elapsed();
+        println!(
+            "RPS: {:.0}",
+            (args.num_requests * args.num_tasks) as f64 / elapsed.as_secs_f64()
+        );
+    }
+}
--- a/pageserver/src/bin/getpage_bench_libpq.rs
+++ b/pageserver/src/bin/getpage_bench_libpq.rs
@@ -0,0 +1,411 @@
+use anyhow::Context;
+use clap::Parser;
+use futures::{SinkExt, TryStreamExt};
+use hyper::client::conn::Parts;
+use hyper::client::HttpConnector;
+use hyper::{Client, Uri};
+use pageserver::page_cache::PAGE_SZ;
+use pageserver::pgdatadir_mapping::{is_rel_block_key, key_to_rel_block};
+use pageserver::{repository, tenant};
+use pageserver_api::models::{
+    PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
+};
+use pageserver_api::reltag::RelTag;
+use rand::prelude::*;
+use scopeguard::defer;
+use std::env::args;
+use std::future::Future;
+use std::str::FromStr;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::{Arc, Mutex};
+use std::thread;
+use tokio::sync::mpsc::{channel, Sender};
+use tokio::sync::Mutex as AsyncMutex;
+use tokio::task::JoinHandle;
+use tokio_stream::{Stream, StreamExt};
+use utils::completion;
+use utils::lsn::Lsn;
+
+struct Key(repository::Key);
+
+impl std::str::FromStr for Key {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        repository::Key::from_hex(s).map(Key)
+    }
+}
+
+struct KeyRange {
+    start: i128,
+    end: i128,
+}
+
+impl KeyRange {
+    fn len(&self) -> i128 {
+        self.end - self.start
+    }
+}
+
+struct RelTagBlockNo {
+    rel_tag: RelTag,
+    block_no: u32,
+}
+
+#[derive(clap::Parser)]
+struct Args {
+    #[clap(long, default_value = "http://localhost:9898")]
+    mgmt_api_endpoint: String,
+    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
+    page_service_connstring: String,
+    // tenant_id: String,
+    // timeline_id: String,
+    num_tasks: usize,
+    num_requests: usize,
+    tenants: Option<Vec<String>>,
+    #[clap(long)]
+    pick_n_tenants: Option<usize>,
+    #[clap(subcommand)]
+    mode: Mode,
+}
+
+#[derive(clap::Parser, Clone)]
+enum Mode {
+    GetPage,
+    NoOp,
+}
+
+#[derive(Debug, Default)]
+struct Stats {
+    completed_requests: AtomicU64,
+}
+
+impl Stats {
+    fn inc(&self) {
+        self.completed_requests.fetch_add(1, Ordering::Relaxed);
+    }
+}
+
+#[tokio::main]
+async fn main() {
+    let args: &'static Args = Box::leak(Box::new(Args::parse()));
+
+    // std::env::set_var("RUST_LOG", "info,tokio_postgres=trace");
+    // tracing_subscriber::fmt::init();
+
+    let client = Client::new();
+
+    let tenants = if let Some(tenants) = &args.tenants {
+        tenants.clone()
+    } else {
+        // let tenant_id = "b97965931096047b2d54958756baee7b";
+        // let timeline_id = "2868f84a8d166779e4c651b116c45059";
+
+        let resp = client
+            .get(Uri::try_from(&format!("{}/v1/tenant", args.mgmt_api_endpoint)).unwrap())
+            .await
+            .unwrap();
+
+        let body = hyper::body::to_bytes(resp).await.unwrap();
+        let tenants: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        let mut out = Vec::new();
+        for t in tenants.as_array().unwrap() {
+            if let Some(limit) = args.pick_n_tenants {
+                if out.len() >= limit {
+                    break;
+                }
+            }
+            out.push(t.get("id").unwrap().as_str().unwrap().to_owned());
+        }
+        if let Some(limit) = args.pick_n_tenants {
+            assert_eq!(out.len(), limit);
+        }
+        out
+    };
+
+    let mut tenant_timelines = Vec::new();
+    for tenant_id in tenants {
+        let resp = client
+            .get(
+                Uri::try_from(&format!(
+                    "{}/v1/tenant/{}/timeline",
+                    args.mgmt_api_endpoint, tenant_id
+                ))
+                .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        let body = hyper::body::to_bytes(resp).await.unwrap();
+        let timelines: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        for t in timelines.as_array().unwrap() {
+            let timeline_id = t.get("timeline_id").unwrap().as_str().unwrap().to_owned();
+            tenant_timelines.push((tenant_id.clone(), timeline_id));
+        }
+    }
+    println!("tenant_timelines:\n{:?}", tenant_timelines);
+
+    let mut stats = Arc::new(Stats::default());
+
+    tokio::spawn({
+        let stats = Arc::clone(&stats);
+        async move {
+            loop {
+                let start = std::time::Instant::now();
+                tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+                let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
+                let elapsed = start.elapsed();
+                println!(
+                    "RPS: {:.0}",
+                    completed_requests as f64 / elapsed.as_secs_f64()
+                );
+            }
+        }
+    });
+
+    let mut tasks = Vec::new();
+    for (tenant_id, timeline_id) in tenant_timelines {
+        let stats = Arc::clone(&stats);
+        let t = tokio::spawn(timeline(
+            args,
+            client.clone(),
+            tenant_id,
+            timeline_id,
+            stats,
+        ));
+        tasks.push(t);
+    }
+
+    for t in tasks {
+        t.await.unwrap();
+    }
+}
+
+fn timeline(
+    args: &'static Args,
+    http_client: Client<HttpConnector, hyper::Body>,
+    tenant_id: String,
+    timeline_id: String,
+    stats: Arc<Stats>,
+) -> impl Future<Output = ()> + Send + Sync {
+    async move {
+        let mut resp = http_client
+            .get(
+                Uri::try_from(&format!(
+                    "{}/v1/tenant/{}/timeline/{}/keyspace",
+                    args.mgmt_api_endpoint, tenant_id, timeline_id
+                ))
+                .unwrap(),
+            )
+            .await
+            .unwrap();
+        if !resp.status().is_success() {
+            panic!("Failed to get keyspace: {resp:?}");
+        }
+        let body = hyper::body::to_bytes(resp).await.unwrap();
+        let keyspace: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        let lsn: Lsn = keyspace["at_lsn"].as_str().unwrap().parse().unwrap();
+
+        let ranges = keyspace["keys"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .filter_map(|r| {
+                let r = r.as_array().unwrap();
+                assert_eq!(r.len(), 2);
+                let start = Key::from_str(r[0].as_str().unwrap()).unwrap();
+                let end = Key::from_str(r[1].as_str().unwrap()).unwrap();
+                // filter out non-relblock keys
+                match (is_rel_block_key(start.0), is_rel_block_key(end.0)) {
+                    (true, true) => Some(KeyRange {
+                        start: start.0.to_i128(),
+                        end: end.0.to_i128(),
+                    }),
+                    (true, false) | (false, true) => {
+                        unimplemented!("split up range")
+                    }
+                    (false, false) => None,
+                }
+            })
+            .collect::<Vec<_>>();
+
+        // weighted ranges
+        let weights = ranges.iter().map(|r| r.len()).collect::<Vec<_>>();
+
+        let ranges = Arc::new(ranges);
+        let weights = Arc::new(weights);
+
+        let mut tasks = Vec::<JoinHandle<()>>::new();
+
+        let start = std::time::Instant::now();
+
+        for i in 0..args.num_tasks {
+            let ranges = ranges.clone();
+            let weights = weights.clone();
+            let client = http_client.clone();
+            let tenant_id = tenant_id.clone();
+            let timeline_id = timeline_id.clone();
+            let task = tokio::spawn({
+                let stats = Arc::clone(&stats);
+                async move {
+                    let mut client = getpage_client::Client::new(
+                        args.page_service_connstring.clone(),
+                        tenant_id.clone(),
+                        timeline_id.clone(),
+                    )
+                    .await
+                    .unwrap();
+                    for i in 0..args.num_requests {
+                        match args.mode {
+                            Mode::GetPage => {
+                                let key = {
+                                    let mut rng = rand::thread_rng();
+                                    let r = ranges.choose_weighted(&mut rng, |r| r.len()).unwrap();
+                                    let key: i128 = rng.gen_range((r.start..r.end));
+                                    let key = repository::Key::from_i128(key);
+                                    // XXX filter these out when we iterate the keyspace
+                                    assert!(
+                                        is_rel_block_key(key),
+                                        "we filter non-relblock keys out above"
+                                    );
+                                    let (rel_tag, block_no) =
+                                        key_to_rel_block(key).expect("we just checked");
+                                    RelTagBlockNo { rel_tag, block_no }
+                                };
+                                client
+                                    .getpage(key, lsn)
+                                    .await
+                                    .with_context(|| {
+                                        format!(
+                                            "getpage for tenant {} timeline {}",
+                                            tenant_id, timeline_id
+                                        )
+                                    })
+                                    .unwrap();
+                            }
+                            Mode::NoOp => {
+                                client.noop().await.unwrap();
+                            }
+                        }
+                        stats.inc();
+                    }
+                    client.shutdown().await;
+                }
+            });
+            tasks.push(task);
+        }
+
+        for task in tasks {
+            task.await.unwrap();
+        }
+    }
+}
+
+mod getpage_client {
+    use std::pin::Pin;
+
+    use futures::SinkExt;
+    use pageserver_api::models::{
+        PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
+        PagestreamGetPageResponse,
+    };
+    use tokio::task::JoinHandle;
+    use tokio_stream::StreamExt;
+    use tokio_util::sync::CancellationToken;
+    use utils::lsn::Lsn;
+
+    use crate::RelTagBlockNo;
+
+    pub(crate) struct Client {
+        copy_both: Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
+        cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
+        conn_task: JoinHandle<()>,
+    }
+
+    impl Client {
+        pub fn new(
+            connstring: String,
+            tenant_id: String,
+            timeline_id: String,
+        ) -> impl std::future::Future<Output = anyhow::Result<Self>> + Send {
+            async move {
+                let (client, connection) =
+                    tokio_postgres::connect(&connstring, postgres::NoTls).await?;
+
+                let conn_task_cancel = CancellationToken::new();
+                let conn_task = tokio::spawn({
+                    let conn_task_cancel = conn_task_cancel.clone();
+                    async move {
+                        tokio::select! {
+                            _ = conn_task_cancel.cancelled() => {
+                                return;
+                            }
+                            res = connection => {
+                                res.unwrap();
+                            }
+                        }
+                    }
+                });
+
+                let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = client
+                    .copy_both_simple(&format!("pagestream {tenant_id} {timeline_id}"))
+                    .await?;
+
+                Ok(Self {
+                    copy_both: Box::pin(copy_both),
+                    conn_task,
+                    cancel_on_client_drop: Some(conn_task_cancel.drop_guard()),
+                })
+            }
+        }
+
+        pub async fn shutdown(mut self) {
+            let _ = self.cancel_on_client_drop.take();
+            self.conn_task.await.unwrap();
+        }
+
+        pub async fn getpage(
+            &mut self,
+            key: RelTagBlockNo,
+            lsn: Lsn,
+        ) -> anyhow::Result<PagestreamGetPageResponse> {
+            let req = PagestreamGetPageRequest {
+                latest: false,
+                rel: key.rel_tag,
+                blkno: key.block_no,
+                lsn,
+            };
+            let req = PagestreamFeMessage::GetPage(req);
+            match self.do_request(req).await? {
+                PagestreamBeMessage::GetPage(p) => Ok(p),
+                x => anyhow::bail!("Unexpected response: {:?}", x),
+            }
+        }
+
+        pub async fn noop(&mut self) -> anyhow::Result<()> {
+            match self.do_request(PagestreamFeMessage::NoOp).await? {
+                PagestreamBeMessage::NoOp => Ok(()),
+                x => anyhow::bail!("Unexpected response: {:?}", x),
+            }
+        }
+
+        async fn do_request(
+            &mut self,
+            req: PagestreamFeMessage,
+        ) -> Result<PagestreamBeMessage, anyhow::Error> {
+            let req: bytes::Bytes = req.serialize();
+            // let mut req = tokio_util::io::ReaderStream::new(&req);
+            let mut req = tokio_stream::once(Ok(req));
+
+            self.copy_both.send_all(&mut req).await?;
+
+            let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
+            let next = next.unwrap().unwrap();
+
+            match PagestreamBeMessage::deserialize(next)? {
+                PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
+                x => Ok(x),
+            }
+        }
+    }
+}
--- a/pageserver/src/bin/noop_server.rs
+++ b/pageserver/src/bin/noop_server.rs
@@ -0,0 +1,109 @@
+use anyhow::Context;
+use bytes::Buf;
+use clap::Parser;
+use pageserver_api::models::{PagestreamBeMessage, PagestreamErrorResponse, PagestreamFeMessage};
+use postgres_backend::{AuthType, PostgresBackend, QueryError};
+use pq_proto::{BeMessage, FeMessage};
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio_util::sync::CancellationToken;
+
+#[derive(clap::Parser)]
+struct Args {
+    bind: String,
+}
+
+#[tokio::main]
+async fn main() {
+    let args = Args::parse();
+
+    let listener = tokio::net::TcpListener::bind(&args.bind).await.unwrap();
+    loop {
+        let (socket, _) = listener.accept().await.unwrap();
+        tokio::spawn(async move {
+            handle_connection(socket).await.unwrap();
+        });
+    }
+}
+
+async fn handle_connection(socket: tokio::net::TcpStream) -> anyhow::Result<()> {
+    socket
+        .set_nodelay(true)
+        .context("could not set TCP_NODELAY")?;
+
+    let peer_addr = socket.peer_addr().context("get peer address")?;
+    let socket = tokio_io_timeout::TimeoutReader::new(socket);
+    tokio::pin!(socket);
+    let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, AuthType::Trust, None)?;
+    let mut conn_handler = NoOpHandler;
+    let cancel = CancellationToken::new();
+    pgbackend
+        .run(&mut conn_handler, || {
+            let cancel = cancel.clone();
+            async move { cancel.cancelled().await }
+        })
+        .await?;
+    anyhow::Ok(())
+}
+
+struct NoOpHandler;
+
+#[async_trait::async_trait]
+impl<IO> postgres_backend::Handler<IO> for NoOpHandler
+where
+    IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
+{
+    fn startup(
+        &mut self,
+        _pgb: &mut PostgresBackend<IO>,
+        _sm: &pq_proto::FeStartupPacket,
+    ) -> Result<(), QueryError> {
+        Ok(())
+    }
+
+    async fn process_query(
+        &mut self,
+        pgb: &mut PostgresBackend<IO>,
+        query_string: &str,
+    ) -> Result<(), QueryError> {
+        if !query_string.starts_with("pagestream ") {
+            return Err(QueryError::Other(anyhow::anyhow!("not a pagestream query")));
+        }
+
+        // switch client to COPYBOTH
+        pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
+        pgb.flush().await?;
+
+        loop {
+            let msg = pgb.read_message().await?;
+
+            let copy_data_bytes = match msg {
+                Some(FeMessage::CopyData(bytes)) => bytes,
+                Some(FeMessage::Terminate) => return Ok(()),
+                Some(m) => {
+                    return Err(QueryError::Other(anyhow::anyhow!(
+                        "unexpected message: {m:?} during COPY"
+                    )));
+                }
+                None => return Ok(()), // client disconnected
+            };
+
+            let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
+
+            let response = match neon_fe_msg {
+                PagestreamFeMessage::NoOp => Ok(PagestreamBeMessage::NoOp),
+                x => Err(QueryError::Other(anyhow::anyhow!(
+                    "this server only supports no-op: {x:?}"
+                ))),
+            };
+
+            let response = response.unwrap_or_else(|e| {
+                PagestreamBeMessage::Error(PagestreamErrorResponse {
+                    message: e.to_string(),
+                })
+            });
+
+            pgb.write_message_noflush(&BeMessage::CopyData(&response.serialize()))?;
+            pgb.flush().await?;
+        }
+    }
+}
--- a/pageserver/src/bin/tokio_tcp_bench.rs
+++ b/pageserver/src/bin/tokio_tcp_bench.rs
@@ -0,0 +1,130 @@
+use std::env::args;
+
+use clap::Parser;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+#[derive(clap::Parser)]
+struct Args {
+    #[clap(subcommand)]
+    mode: Mode,
+}
+
+#[derive(clap::Parser)]
+enum Mode {
+    Client(Client),
+    Server(Server),
+}
+
+#[derive(clap::Parser)]
+struct Client {
+    num_tasks: usize,
+}
+#[derive(clap::Parser)]
+struct Server {}
+
+#[tokio::main]
+async fn main() {
+    let args: &'static _ = Box::leak(Box::new(Args::parse()));
+
+    match &args.mode {
+        Mode::Client(x) => client::client(x).await,
+        Mode::Server(x) => server::server(x).await,
+    }
+}
+
+mod client {
+    use std::sync::{atomic::{Ordering, AtomicU64}, Arc};
+
+    use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+    use super::Client;
+
+    #[derive(Debug, Default)]
+    struct Stats {
+        completed_requests: AtomicU64,
+    }
+
+    impl Stats {
+        fn inc(&self) {
+            self.completed_requests.fetch_add(1, Ordering::Relaxed);
+        }
+    }
+    pub(crate) async fn client(args: &'static Client) {
+        let mut stats = Arc::new(Stats::default());
+
+        tokio::spawn({
+            let stats = Arc::clone(&stats);
+            async move {
+                loop {
+                    let start = std::time::Instant::now();
+                    tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+                    let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
+                    let elapsed = start.elapsed();
+                    println!(
+                        "RPS: {:.0} RPS/client: {:.2}",
+                        completed_requests as f64 / elapsed.as_secs_f64(),
+                        completed_requests as f64 / elapsed.as_secs_f64() / args.num_tasks as f64,
+                    );
+                }
+            }
+        });
+
+        let mut tasks = Vec::new();
+        for _  in 0..args.num_tasks {
+            let stats = Arc::clone(&stats);
+            let t = tokio::spawn(client_task(args, stats));
+            tasks.push(t);
+        }
+
+        for t in tasks {
+            t.await.unwrap();
+        }
+    }
+
+    async fn client_task(args: &'static Client, stats: Arc<Stats>) -> anyhow::Result<()> {
+        let mut conn = tokio::net::TcpStream::connect("localhost:65000").await?;
+        conn.set_nodelay(true)?;
+
+        loop {
+            let mut buf = [0u8; 1];
+            conn.write_all(&buf).await?;
+            conn.read_exact(&mut buf).await?;
+            stats.inc();
+        }
+    }
+}
+
+mod server {
+
+    use anyhow::Context;
+    use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+    use super::Server;
+
+    pub(crate) async fn server(args: &'static Server) {
+        let listener = tokio::net::TcpListener::bind("localhost:65000").await.unwrap();
+        loop {
+            let (socket, _) = listener.accept().await.unwrap();
+            tokio::spawn(async move {
+                server_handle_connection(args, socket).await.unwrap();
+            });
+        }
+    }
+
+    async fn server_handle_connection(
+        args: &'static Server,
+        socket: tokio::net::TcpStream,
+    ) -> anyhow::Result<()> {
+        socket
+            .set_nodelay(true)
+            .context("could not set TCP_NODELAY")?;
+        // let socket = tokio_io_timeout::TimeoutReader::new(socket);
+        tokio::pin!(socket);
+
+        loop {
+            let mut buf = [0u8; 4096];
+            socket.read_exact(&mut buf).await?;
+            socket.write_all(&buf).await?;
+        }
+    }
+}
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -1314,6 +1314,12 @@ broker_endpoint = '{broker_endpoint}'
            assert_eq!(
                parsed_remote_storage_config,
                RemoteStorageConfig {
+                    max_concurrent_syncs: NonZeroUsize::new(
+                        remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS
+                    )
+                        .unwrap(),
+                    max_sync_errors: NonZeroU32::new(remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS)
+                        .unwrap(),
                    storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
                },
                "Remote storage config should correctly parse the local FS config and fill other storage defaults"
@@ -1374,6 +1380,8 @@ broker_endpoint = '{broker_endpoint}'
            assert_eq!(
                parsed_remote_storage_config,
                RemoteStorageConfig {
+                    max_concurrent_syncs,
+                    max_sync_errors,
                    storage: RemoteStorageKind::AwsS3(S3Config {
                        bucket_name: bucket_name.clone(),
                        bucket_region: bucket_region.clone(),
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -345,7 +345,7 @@ impl DeletionList {
                result.extend(
                    timeline_layers
                        .into_iter()
-                        .map(|l| timeline_remote_path.join(&Utf8PathBuf::from(l))),
+                        .map(|l| timeline_remote_path.join(Utf8PathBuf::from(l))),
                );
            }
        }
@@ -893,6 +893,14 @@ mod test {
        std::fs::create_dir_all(remote_fs_dir)?;
        let remote_fs_dir = harness.conf.workdir.join("remote_fs").canonicalize_utf8()?;
        let storage_config = RemoteStorageConfig {
+            max_concurrent_syncs: std::num::NonZeroUsize::new(
+                remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS,
+            )
+            .unwrap(),
+            max_sync_errors: std::num::NonZeroU32::new(
+                remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS,
+            )
+            .unwrap(),
            storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
        };
        let storage = GenericRemoteStorage::from_config(&storage_config).unwrap();
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -352,8 +352,7 @@ paths:
          in: query
          required: true
          schema:
-            type: string
-            format: hex
+            type: integer
          description: A LSN to get the timestamp
      responses:
        "200":
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -303,7 +303,11 @@ async fn build_timeline_info(
        // we're executing this function, we will outlive the timeline on-disk state.
        info.current_logical_size_non_incremental = Some(
            timeline
-                .get_current_logical_size_non_incremental(info.last_record_lsn, ctx)
+                .get_current_logical_size_non_incremental(
+                    info.last_record_lsn,
+                    CancellationToken::new(),
+                    ctx,
+                )
                .await?,
        );
    }
@@ -743,6 +747,46 @@ async fn tenant_ignore_handler(
    json_response(StatusCode::OK, ())
 }

+async fn tenant_duplicate_handler(
+    mut request: Request<Body>,
+    cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    let src_tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+
+    let request_data: TenantCreateRequest = json_request(&mut request).await?;
+    let new_tenant_id = request_data.new_tenant_id;
+    check_permission(&request, None)?;
+
+    let _timer = STORAGE_TIME_GLOBAL
+        .get_metric_with_label_values(&[StorageTimeOperation::DuplicateTenant.into()])
+        .expect("bug")
+        .start_timer();
+
+    let tenant_conf =
+        TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
+
+    let state = get_state(&request);
+
+    let generation = get_request_generation(state, request_data.generation)?;
+
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
+
+    mgr::duplicate_tenant(
+        state.conf,
+        tenant_conf,
+        src_tenant_id,
+        new_tenant_id,
+        generation,
+        state.tenant_resources(),
+        &ctx,
+        &cancel,
+    )
+    .instrument(info_span!("tenant_duplicate", %src_tenant_id, tenant_id = %new_tenant_id))
+    .await?;
+
+    json_response(StatusCode::CREATED, TenantCreateResponse(new_tenant_id))
+}
+
 async fn tenant_list_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
@@ -1667,8 +1711,6 @@ where
        );

        match handle.await {
-            // TODO: never actually return Err from here, always Ok(...) so that we can log
-            // spanned errors. Call api_error_handler instead and return appropriate Body.
            Ok(result) => result,
            Err(e) => {
                // The handler task panicked. We have a global panic handler that logs the
@@ -1785,6 +1827,9 @@ pub fn make_router(
        .post("/v1/tenant/:tenant_id/ignore", |r| {
            api_handler(r, tenant_ignore_handler)
        })
+        .post("/v1/tenant/:tenant_id/duplicate", |r| {
+            api_handler(r, tenant_duplicate_handler)
+        })
        .get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
            api_handler(r, timeline_detail_handler)
        })
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -51,6 +51,9 @@ pub enum StorageTimeOperation {

    #[strum(serialize = "create tenant")]
    CreateTenant,
+
+    #[strum(serialize = "duplicate tenant")]
+    DuplicateTenant,
 }

 pub static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
@@ -757,6 +760,7 @@ pub enum SmgrQueryType {
    GetRelSize,
    GetPageAtLsn,
    GetDbSize,
+    NoOp,
 }

 #[derive(Debug)]
@@ -1225,6 +1229,15 @@ pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

+pub(crate) static WAL_REDO_WAIT_TIME: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "pageserver_wal_redo_wait_seconds",
+        "Time spent waiting for access to the Postgres WAL redo process",
+        redo_histogram_time_buckets!(),
+    )
+    .expect("failed to define a metric")
+});
+
 pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
    register_histogram!(
        "pageserver_wal_redo_records_histogram",
@@ -1919,6 +1932,7 @@ pub fn preinitialize_metrics() {
        &READ_NUM_FS_LAYERS,
        &WAIT_LSN_TIME,
        &WAL_REDO_TIME,
+        &WAL_REDO_WAIT_TIME,
        &WAL_REDO_RECORDS_HISTOGRAM,
        &WAL_REDO_BYTES_HISTOGRAM,
    ]
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -218,27 +218,9 @@ async fn page_service_conn_main(
    // no write timeout is used, because the kernel is assumed to error writes after some time.
    let mut socket = tokio_io_timeout::TimeoutReader::new(socket);

-    let default_timeout_ms = 10 * 60 * 1000; // 10 minutes by default
-    let socket_timeout_ms = (|| {
-        fail::fail_point!("simulated-bad-compute-connection", |avg_timeout_ms| {
-            // Exponential distribution for simulating
-            // poor network conditions, expect about avg_timeout_ms to be around 15
-            // in tests
-            if let Some(avg_timeout_ms) = avg_timeout_ms {
-                let avg = avg_timeout_ms.parse::<i64>().unwrap() as f32;
-                let u = rand::random::<f32>();
-                ((1.0 - u).ln() / (-avg)) as u64
-            } else {
-                default_timeout_ms
-            }
-        });
-        default_timeout_ms
-    })();
-
-    // A timeout here does not mean the client died, it can happen if it's just idle for
-    // a while: we will tear down this PageServerHandler and instantiate a new one if/when
-    // they reconnect.
-    socket.set_timeout(Some(std::time::Duration::from_millis(socket_timeout_ms)));
+    // timeout should be lower, but trying out multiple days for
+    // <https://github.com/neondatabase/neon/issues/4205>
+    socket.set_timeout(Some(std::time::Duration::from_secs(60 * 60 * 24 * 3)));
    let socket = std::pin::pin!(socket);

    // XXX: pgbackend.run() should take the connection_ctx,
@@ -509,14 +491,15 @@ impl PageServerHandler {
                        span,
                    )
                }
+                PagestreamFeMessage::NoOp => {
+                    let _timer = metrics.start_timer(metrics::SmgrQueryType::NoOp);
+                    let span = tracing::info_span!("no_op");
+                    (Ok(PagestreamBeMessage::NoOp), span)
+                }
            };

            if let Err(e) = &response {
-                // Requests may fail as soon as we are Stopping, even if the Timeline's cancellation token wasn't fired yet,
-                // because wait_lsn etc will drop out
-                // is_stopping(): [`Timeline::flush_and_shutdown`] has entered
-                // is_canceled(): [`Timeline::shutdown`]` has entered
-                if timeline.cancel.is_cancelled() || timeline.is_stopping() {
+                if timeline.cancel.is_cancelled() {
                    // If we fail to fulfil a request during shutdown, which may be _because_ of
                    // shutdown, then do not send the error to the client.  Instead just drop the
                    // connection.
@@ -920,7 +903,7 @@ impl PageServerHandler {

    // when accessing management api supply None as an argument
    // when using to authorize tenant pass corresponding tenant id
-    fn check_permission(&self, tenant_id: Option<TenantId>) -> Result<(), QueryError> {
+    fn check_permission(&self, tenant_id: Option<TenantId>) -> anyhow::Result<()> {
        if self.auth.is_none() {
            // auth is set to Trust, nothing to check so just return ok
            return Ok(());
@@ -932,7 +915,7 @@ impl PageServerHandler {
            .claims
            .as_ref()
            .expect("claims presence already checked");
-        check_permission(claims, tenant_id).map_err(|e| QueryError::Unauthorized(e.0))
+        check_permission(claims, tenant_id)
    }

    /// Shorthand for getting a reference to a Timeline of an Active tenant.
@@ -971,17 +954,16 @@ where
            .auth
            .as_ref()
            .unwrap()
-            .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)
-            .map_err(|e| QueryError::Unauthorized(e.0))?;
+            .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)?;

        if matches!(data.claims.scope, Scope::Tenant) && data.claims.tenant_id.is_none() {
-            return Err(QueryError::Unauthorized(
-                "jwt token scope is Tenant, but tenant id is missing".into(),
-            ));
+            return Err(QueryError::Other(anyhow::anyhow!(
+                "jwt token scope is Tenant, but tenant id is missing"
+            )));
        }

-        debug!(
-            "jwt scope check succeeded for scope: {:#?} by tenant id: {:?}",
+        info!(
+            "jwt auth succeeded for scope: {:#?} by tenant id: {:?}",
            data.claims.scope, data.claims.tenant_id,
        );

@@ -1003,13 +985,9 @@ where
        pgb: &mut PostgresBackend<IO>,
        query_string: &str,
    ) -> Result<(), QueryError> {
-        fail::fail_point!("simulated-bad-compute-connection", |_| {
-            info!("Hit failpoint for bad connection");
-            Err(QueryError::SimulatedConnectionError)
-        });
-
        let ctx = self.connection_ctx.attached_child();
        debug!("process query {query_string:?}");
+
        if query_string.starts_with("pagestream ") {
            let (_, params_raw) = query_string.split_at("pagestream ".len());
            let params = params_raw.split(' ').collect::<Vec<_>>();
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize};
 use std::collections::{hash_map, HashMap, HashSet};
 use std::ops::ControlFlow;
 use std::ops::Range;
+use tokio_util::sync::CancellationToken;
 use tracing::{debug, trace, warn};
 use utils::{bin_ser::BeSer, lsn::Lsn};

@@ -577,6 +578,7 @@ impl Timeline {
    pub async fn get_current_logical_size_non_incremental(
        &self,
        lsn: Lsn,
+        cancel: CancellationToken,
        ctx: &RequestContext,
    ) -> Result<u64, CalculateLogicalSizeError> {
        crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
@@ -588,7 +590,7 @@ impl Timeline {
        let mut total_size: u64 = 0;
        for (spcnode, dbnode) in dbdir.dbdirs.keys() {
            for rel in self.list_rels(*spcnode, *dbnode, lsn, ctx).await? {
-                if self.cancel.is_cancelled() {
+                if cancel.is_cancelled() {
                    return Err(CalculateLogicalSizeError::Cancelled);
                }
                let relsize_key = rel_size_to_key(rel);
@@ -1698,6 +1700,7 @@ const AUX_FILES_KEY: Key = Key {
 // Reverse mappings for a few Keys.
 // These are needed by WAL redo manager.

+/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
 pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    Ok(match key.field1 {
        0x00 => (
@@ -1713,7 +1716,8 @@ pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    })
 }

-fn is_rel_block_key(key: Key) -> bool {
+/// See [[key_to_rel_block]].
+pub fn is_rel_block_key(key: Key) -> bool {
    key.field1 == 0x00 && key.field4 != 0
 }

--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -1841,13 +1841,7 @@ impl Tenant {
            timelines.values().for_each(|timeline| {
                let timeline = Arc::clone(timeline);
                let span = Span::current();
-                js.spawn(async move {
-                    if freeze_and_flush {
-                        timeline.flush_and_shutdown().instrument(span).await
-                    } else {
-                        timeline.shutdown().instrument(span).await
-                    }
-                });
+                js.spawn(async move { timeline.shutdown(freeze_and_flush).instrument(span).await });
            })
        };
        tracing::info!("Waiting for timelines...");
@@ -3534,6 +3528,10 @@ pub(crate) mod harness {
            let remote_fs_dir = conf.workdir.join("localfs");
            std::fs::create_dir_all(&remote_fs_dir).unwrap();
            let config = RemoteStorageConfig {
+                // TODO: why not remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS,
+                max_concurrent_syncs: std::num::NonZeroUsize::new(2_000_000).unwrap(),
+                // TODO: why not remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS,
+                max_sync_errors: std::num::NonZeroU32::new(3_000_000).unwrap(),
                storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
            };
            let remote_storage = GenericRemoteStorage::from_config(&config).unwrap();
@@ -4733,7 +4731,7 @@ mod tests {
            // Keeps uninit mark in place
            let raw_tline = tline.raw_timeline().unwrap();
            raw_tline
-                .shutdown()
+                .shutdown(false)
                .instrument(info_span!("test_shutdown", tenant_id=%raw_tline.tenant_id))
                .await;
            std::mem::forget(tline);
--- a/pageserver/src/tenant/blob_io.rs
+++ b/pageserver/src/tenant/blob_io.rs
@@ -327,7 +327,7 @@ mod tests {
                let mut sz: u16 = rng.gen();
                // Make 50% of the arrays small
                if rng.gen() {
-                    sz &= 63;
+                    sz |= 63;
                }
                random_array(sz.into())
            })
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -6,9 +6,11 @@ use rand::{distributions::Alphanumeric, Rng};
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::ops::Deref;
+use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tokio::fs;
+use tokio::io::AsyncSeekExt;
 use utils::timeout::{timeout_cancellable, TimeoutCancellableError};

 use anyhow::Context;
@@ -30,7 +32,11 @@ use crate::metrics::TENANT_MANAGER as METRICS;
 use crate::task_mgr::{self, TaskKind};
 use crate::tenant::config::{AttachmentMode, LocationConf, LocationMode, TenantConfOpt};
 use crate::tenant::delete::DeleteTenantFlow;
-use crate::tenant::{create_tenant_files, AttachedTenantConf, SpawnMode, Tenant, TenantState};
+use crate::tenant::span::debug_assert_current_span_has_tenant_id;
+use crate::tenant::storage_layer::{DeltaLayer, ImageLayer, LayerFileName};
+use crate::tenant::{
+    create_tenant_files, remote_timeline_client, AttachedTenantConf, IndexPart, Tenant, TenantState,
+};
 use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX};

 use utils::crashsafe::path_with_suffix_extension;
@@ -40,7 +46,7 @@ use utils::id::{TenantId, TimelineId};

 use super::delete::DeleteTenantError;
 use super::timeline::delete::DeleteTimelineFlow;
-use super::TenantSharedResources;
+use super::{SpawnMode, TenantSharedResources};

 /// For a tenant that appears in TenantsMap, it may either be
 /// - `Attached`: has a full Tenant object, is elegible to service
@@ -566,10 +572,8 @@ pub(crate) async fn shutdown_all_tenants() {
 async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
    use utils::completion;

-    let mut join_set = JoinSet::new();
-
-    // Atomically, 1. create the shutdown tasks and 2. prevent creation of new tenants.
-    let (total_in_progress, total_attached) = {
+    // Atomically, 1. extract the list of tenants to shut down and 2. prevent creation of new tenants.
+    let (in_progress_ops, tenants_to_shut_down) = {
        let mut m = tenants.write().unwrap();
        match &mut *m {
            TenantsMap::Initializing => {
@@ -579,67 +583,78 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
            }
            TenantsMap::Open(tenants) => {
                let mut shutdown_state = HashMap::new();
-                let mut total_in_progress = 0;
-                let mut total_attached = 0;
+                let mut in_progress_ops = Vec::new();
+                let mut tenants_to_shut_down = Vec::new();

-                for (tenant_id, v) in tenants.drain() {
+                for (k, v) in tenants.drain() {
                    match v {
                        TenantSlot::Attached(t) => {
-                            shutdown_state.insert(tenant_id, TenantSlot::Attached(t.clone()));
-                            join_set.spawn(
-                                async move {
-                                    let freeze_and_flush = true;
-
-                                    let res = {
-                                        let (_guard, shutdown_progress) = completion::channel();
-                                        t.shutdown(shutdown_progress, freeze_and_flush).await
-                                    };
-
-                                    if let Err(other_progress) = res {
-                                        // join the another shutdown in progress
-                                        other_progress.wait().await;
-                                    }
-
-                                    // we cannot afford per tenant logging here, because if s3 is degraded, we are
-                                    // going to log too many lines
-                                    debug!("tenant successfully stopped");
-                                }
-                                .instrument(info_span!("shutdown", %tenant_id)),
-                            );
-
-                            total_attached += 1;
+                            tenants_to_shut_down.push(t.clone());
+                            shutdown_state.insert(k, TenantSlot::Attached(t));
                        }
                        TenantSlot::Secondary => {
-                            shutdown_state.insert(tenant_id, TenantSlot::Secondary);
+                            shutdown_state.insert(k, TenantSlot::Secondary);
                        }
                        TenantSlot::InProgress(notify) => {
                            // InProgress tenants are not visible in TenantsMap::ShuttingDown: we will
                            // wait for their notifications to fire in this function.
-                            join_set.spawn(async move {
-                                notify.wait().await;
-                            });
-
-                            total_in_progress += 1;
+                            in_progress_ops.push(notify);
                        }
                    }
                }
                *m = TenantsMap::ShuttingDown(shutdown_state);
-                (total_in_progress, total_attached)
+                (in_progress_ops, tenants_to_shut_down)
            }
            TenantsMap::ShuttingDown(_) => {
+                // TODO: it is possible that detach and shutdown happen at the same time. as a
+                // result, during shutdown we do not wait for detach.
                error!("already shutting down, this function isn't supposed to be called more than once");
                return;
            }
        }
    };

-    let started_at = std::time::Instant::now();
-
    info!(
        "Waiting for {} InProgress tenants and {} Attached tenants to shut down",
-        total_in_progress, total_attached
+        in_progress_ops.len(),
+        tenants_to_shut_down.len()
    );

+    for barrier in in_progress_ops {
+        barrier.wait().await;
+    }
+
+    info!(
+        "InProgress tenants shut down, waiting for {} Attached tenants to shut down",
+        tenants_to_shut_down.len()
+    );
+    let started_at = std::time::Instant::now();
+    let mut join_set = JoinSet::new();
+    for tenant in tenants_to_shut_down {
+        let tenant_id = tenant.get_tenant_id();
+        join_set.spawn(
+            async move {
+                let freeze_and_flush = true;
+
+                let res = {
+                    let (_guard, shutdown_progress) = completion::channel();
+                    tenant.shutdown(shutdown_progress, freeze_and_flush).await
+                };
+
+                if let Err(other_progress) = res {
+                    // join the another shutdown in progress
+                    other_progress.wait().await;
+                }
+
+                // we cannot afford per tenant logging here, because if s3 is degraded, we are
+                // going to log too many lines
+
+                debug!("tenant successfully stopped");
+            }
+            .instrument(info_span!("shutdown", %tenant_id)),
+        );
+    }
+
    let total = join_set.len();
    let mut panicked = 0;
    let mut buffering = true;
@@ -652,7 +667,7 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
                match joined {
                    Ok(()) => {}
                    Err(join_error) if join_error.is_cancelled() => {
-                        unreachable!("we are not cancelling any of the tasks");
+                        unreachable!("we are not cancelling any of the futures");
                    }
                    Err(join_error) if join_error.is_panic() => {
                        // cannot really do anything, as this panic is likely a bug
@@ -726,6 +741,171 @@ pub(crate) async fn create_tenant(
    Ok(created_tenant)
 }

+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn duplicate_tenant(
+    conf: &'static PageServerConf,
+    tenant_conf: TenantConfOpt,
+    src_tenant_id: TenantId,
+    new_tenant_id: TenantId,
+    generation: Generation,
+    resources: TenantSharedResources,
+    ctx: &RequestContext,
+    cancel: &CancellationToken,
+) -> Result<(), TenantMapInsertError> {
+    debug_assert_current_span_has_tenant_id();
+
+    // TODO: would be nice to use tenant_map_insert here, but, we're not ready to create a Tenant object yet
+    let tempdir = path_with_suffix_extension(
+        conf.tenants_path().join(&new_tenant_id.to_string()),
+        &format!("duplication.{TEMP_FILE_SUFFIX}"),
+    );
+    tokio::fs::remove_dir_all(&tempdir)
+        .await
+        .or_else(|e| match e.kind() {
+            std::io::ErrorKind::NotFound => Ok(()),
+            _ => Err(e),
+        })
+        .context("pre-run clean up tempdir")?;
+
+    tokio::fs::create_dir(&tempdir)
+        .await
+        .context("create tempdir")?;
+
+    // Copy the tenant's data in S3
+    let remote_storage = resources
+        .remote_storage
+        .as_ref()
+        .context("only works with remote storage")?;
+
+    let (remote_src_timelines, other_prefixes) = remote_timeline_client::list_remote_timelines(
+        remote_storage,
+        src_tenant_id,
+        cancel.clone(),
+    )
+    .await
+    .context("list src timelines")?;
+
+    if !other_prefixes.is_empty() {
+        return Err(TenantMapInsertError::Other(anyhow::anyhow!(
+            "unimplemented: handling of other prefixes in src tenant: {:?}",
+            other_prefixes
+        )));
+    }
+
+    info!(?remote_src_timelines, "got src timelines");
+
+    for timeline_id in remote_src_timelines {
+        async {
+            let tempdir = tempdir.join(&timeline_id.to_string());
+
+            tokio::fs::create_dir(&tempdir)
+                .await
+                .context("create tempdir for timeline")?;
+
+            let remote_src_tl =
+                remote_timeline_client::remote_timeline_path(&src_tenant_id, &timeline_id);
+            let remote_dst_tl =
+                remote_timeline_client::remote_timeline_path(&new_tenant_id, &timeline_id);
+
+            let object_names = remote_storage
+                .list_prefixes(Some(&remote_src_tl))
+                .await
+                .context("list timeline remote prefix")?;
+
+            for name in object_names {
+                async {
+                    let name = name.object_name().context(
+                        "list_prefixes return values should always have object_name()=Some",
+                    )?;
+                    let remote_src_obj = remote_src_tl.join(name);
+                    let remote_dst_obj = remote_dst_tl.join(name);
+
+                    let tmp_obj_filepath = tempdir.join(name);
+                    let mut tmp_obj_file = tokio::fs::OpenOptions::new()
+                        .read(true)
+                        .write(true)
+                        .create_new(true)
+                        .open(&tmp_obj_filepath)
+                        .await
+                        .context("create temp file")?;
+                    let mut tmp_dl = remote_storage
+                        .download(&remote_src_obj)
+                        .await
+                        .context("start download")?;
+                    let tmp_obj_size =
+                        tokio::io::copy(&mut tmp_dl.download_stream, &mut tmp_obj_file)
+                            .await
+                            .context("do the download")?;
+
+                    if name == IndexPart::FILE_NAME {
+                        // needs no patching
+                    } else {
+                        let name = LayerFileName::from_str(name).map_err(|e: String| {
+                            anyhow::anyhow!("unknown key in timeline s3 prefix: {name:?}: {e}")
+                        })?;
+                        match name {
+                            LayerFileName::Image(_) => {
+                                ImageLayer::rewrite_tenant_timeline(
+                                    &tmp_obj_filepath,
+                                    new_tenant_id,
+                                    timeline_id, /* leave as is */
+                                    ctx,
+                                )
+                                .await
+                                .context("rewrite tenant timeline")?;
+                            }
+                            LayerFileName::Delta(_) => {
+                                DeltaLayer::rewrite_tenant_timeline(
+                                    &tmp_obj_filepath,
+                                    new_tenant_id,
+                                    timeline_id, /* leave as is */
+                                    ctx,
+                                )
+                                .await
+                                .context("rewrite tenant timeline")?;
+                            }
+                        }
+                    }
+
+                    info!(?remote_dst_obj, "uploading");
+
+                    tmp_obj_file
+                        .seek(std::io::SeekFrom::Start(0))
+                        .await
+                        .context("seek tmp file to beginning for upload")?;
+                    remote_storage
+                        .upload(
+                            tmp_obj_file,
+                            tmp_obj_size as usize,
+                            &remote_dst_obj,
+                            tmp_dl.metadata,
+                        )
+                        .await
+                        .context("upload modified")?;
+
+                    tokio::fs::remove_file(tmp_obj_filepath)
+                        .await
+                        .context("remove temp file")?;
+
+                    anyhow::Ok(())
+                }
+                .instrument(info_span!("copy object", object_name=?name))
+                .await
+                .context("copy object")?;
+            }
+            anyhow::Ok(())
+        }
+        .instrument(info_span!("copy_timeline", timeline_id=%timeline_id))
+        .await?;
+    }
+
+    tokio::fs::remove_dir_all(&tempdir)
+        .await
+        .context("post-run clean up tempdir")?;
+
+    attach_tenant(conf, new_tenant_id, generation, tenant_conf, resources, ctx).await
+}
+
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum SetNewTenantConfigError {
    #[error(transparent)]
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -6,6 +6,7 @@ use std::sync::Arc;
 use anyhow::{bail, Context};
 use tokio::sync::oneshot::error::RecvError;
 use tokio::sync::Semaphore;
+use tokio_util::sync::CancellationToken;

 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
@@ -349,6 +350,10 @@ async fn fill_logical_sizes(
    // our advantage with `?` error handling.
    let mut joinset = tokio::task::JoinSet::new();

+    let cancel = tokio_util::sync::CancellationToken::new();
+    // be sure to cancel all spawned tasks if we are dropped
+    let _dg = cancel.clone().drop_guard();
+
    // For each point that would benefit from having a logical size available,
    // spawn a Task to fetch it, unless we have it cached already.
    for seg in segments.iter() {
@@ -366,8 +371,15 @@ async fn fill_logical_sizes(
                let parallel_size_calcs = Arc::clone(limit);
                let ctx = ctx.attached_child();
                joinset.spawn(
-                    calculate_logical_size(parallel_size_calcs, timeline, lsn, cause, ctx)
-                        .in_current_span(),
+                    calculate_logical_size(
+                        parallel_size_calcs,
+                        timeline,
+                        lsn,
+                        cause,
+                        ctx,
+                        cancel.child_token(),
+                    )
+                    .in_current_span(),
                );
            }
            e.insert(cached_size);
@@ -475,13 +487,14 @@ async fn calculate_logical_size(
    lsn: utils::lsn::Lsn,
    cause: LogicalSizeCalculationCause,
    ctx: RequestContext,
+    cancel: CancellationToken,
 ) -> Result<TimelineAtLsnSizeResult, RecvError> {
    let _permit = tokio::sync::Semaphore::acquire_owned(limit)
        .await
        .expect("global semaphore should not had been closed");

    let size_res = timeline
-        .spawn_ondemand_logical_size_calculation(lsn, cause, ctx)
+        .spawn_ondemand_logical_size_calculation(lsn, cause, ctx, cancel)
        .instrument(info_span!("spawn_ondemand_logical_size_calculation"))
        .await?;
    Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -609,6 +609,49 @@ impl Drop for DeltaLayerWriter {
    }
 }

+impl DeltaLayer {
+    /// Assume the file at `path` is corrupt if this function returns with an error.
+    pub(crate) async fn rewrite_tenant_timeline(
+        path: &Utf8Path,
+        new_tenant: TenantId,
+        new_timeline: TimelineId,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<()> {
+        let file = VirtualFile::open_with_options(
+            path,
+            &*std::fs::OpenOptions::new().read(true).write(true),
+        )
+        .await
+        .with_context(|| format!("Failed to open file '{}'", path))?;
+        let file = FileBlockReader::new(file);
+        let summary_blk = file.read_blk(0, ctx).await?;
+        let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
+        let mut file = file.file;
+        if actual_summary.magic != DELTA_FILE_MAGIC {
+            bail!("File '{}' is not a delta layer", path);
+        }
+        let new_summary = Summary {
+            tenant_id: new_tenant,
+            timeline_id: new_timeline,
+            ..actual_summary
+        };
+
+        let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
+        Summary::ser_into(&new_summary, &mut buf)?;
+        if buf.spilled() {
+            // The code in ImageLayerWriterInner just warn!()s for this.
+            // It should probably error out as well.
+            anyhow::bail!(
+                "Used more than one page size for summary buffer: {}",
+                buf.len()
+            );
+        }
+        file.seek(SeekFrom::Start(0)).await?;
+        file.write_all(&buf).await?;
+        Ok(())
+    }
+}
+
 impl DeltaLayerInner {
    pub(super) async fn load(
        path: &Utf8Path,
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -294,6 +294,49 @@ impl ImageLayer {
    }
 }

+impl ImageLayer {
+    /// Assume the file at `path` is corrupt if this function returns with an error.
+    pub(crate) async fn rewrite_tenant_timeline(
+        path: &Utf8Path,
+        new_tenant: TenantId,
+        new_timeline: TimelineId,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<()> {
+        let file = VirtualFile::open_with_options(
+            path,
+            &*std::fs::OpenOptions::new().read(true).write(true),
+        )
+        .await
+        .with_context(|| format!("Failed to open file '{}'", path))?;
+        let file = FileBlockReader::new(file);
+        let summary_blk = file.read_blk(0, ctx).await?;
+        let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
+        let mut file = file.file;
+        if actual_summary.magic != IMAGE_FILE_MAGIC {
+            bail!("File '{}' is not a delta layer", path);
+        }
+        let new_summary = Summary {
+            tenant_id: new_tenant,
+            timeline_id: new_timeline,
+            ..actual_summary
+        };
+
+        let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
+        Summary::ser_into(&new_summary, &mut buf)?;
+        if buf.spilled() {
+            // The code in ImageLayerWriterInner just warn!()s for this.
+            // It should probably error out as well.
+            anyhow::bail!(
+                "Used more than one page size for summary buffer: {}",
+                buf.len()
+            );
+        }
+        file.seek(SeekFrom::Start(0)).await?;
+        file.write_all(&buf).await?;
+        Ok(())
+    }
+}
+
 impl ImageLayerInner {
    pub(super) async fn load(
        path: &Utf8Path,
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -251,7 +251,6 @@ impl Layer {

        layer
            .get_value_reconstruct_data(key, lsn_range, reconstruct_data, &self.0, ctx)
-            .instrument(tracing::info_span!("get_value_reconstruct_data", layer=%self))
            .await
    }

@@ -1212,10 +1211,8 @@ impl DownloadedLayer {
            // this will be a permanent failure
            .context("load layer");

-            if let Err(e) = res.as_ref() {
+            if res.is_err() {
                LAYER_IMPL_METRICS.inc_permanent_loading_failures();
-                // TODO(#5815): we are not logging all errors, so temporarily log them here as well
-                tracing::error!("layer loading failed permanently: {e:#}");
            }
            res
        };
@@ -1294,7 +1291,6 @@ impl ResidentLayer {
    }

    /// Loads all keys stored in the layer. Returns key, lsn and value size.
-    #[tracing::instrument(skip_all, fields(layer=%self))]
    pub(crate) async fn load_keys<'a>(
        &'a self,
        ctx: &RequestContext,
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -36,6 +36,7 @@ use std::time::{Duration, Instant, SystemTime};
 use crate::context::{
    AccessStatsBehavior, DownloadBehavior, RequestContext, RequestContextBuilder,
 };
+use crate::deletion_queue::DeletionQueueClient;
 use crate::tenant::storage_layer::delta_layer::DeltaEntry;
 use crate::tenant::storage_layer::{
    AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer,
@@ -49,7 +50,6 @@ use crate::tenant::{
    metadata::{save_metadata, TimelineMetadata},
    par_fsync,
 };
-use crate::{deletion_queue::DeletionQueueClient, tenant::remote_timeline_client::StopError};

 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceRandomAccum};
@@ -247,7 +247,7 @@ pub struct Timeline {
    /// the flush finishes. You can use that to wait for the flush to finish.
    layer_flush_start_tx: tokio::sync::watch::Sender<u64>,
    /// to be notified when layer flushing has finished, subscribe to the layer_flush_done channel
-    layer_flush_done_tx: tokio::sync::watch::Sender<(u64, Result<(), FlushLayerError>)>,
+    layer_flush_done_tx: tokio::sync::watch::Sender<(u64, anyhow::Result<()>)>,

    /// Layer removal lock.
    /// A lock to ensure that no layer of the timeline is removed concurrently by other tasks.
@@ -374,19 +374,6 @@ pub enum PageReconstructError {
    WalRedo(anyhow::Error),
 }

-#[derive(thiserror::Error, Debug)]
-enum FlushLayerError {
-    /// Timeline cancellation token was cancelled
-    #[error("timeline shutting down")]
-    Cancelled,
-
-    #[error(transparent)]
-    PageReconstructError(#[from] PageReconstructError),
-
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
 impl std::fmt::Debug for PageReconstructError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
        match self {
@@ -904,16 +891,15 @@ impl Timeline {
        self.launch_eviction_task(background_jobs_can_start);
    }

-    /// Graceful shutdown, may do a lot of I/O as we flush any open layers to disk and then
-    /// also to remote storage.  This method can easily take multiple seconds for a busy timeline.
-    ///
-    /// While we are flushing, we continue to accept read I/O.
    #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
-    pub(crate) async fn flush_and_shutdown(&self) {
+    pub async fn shutdown(self: &Arc<Self>, freeze_and_flush: bool) {
        debug_assert_current_span_has_tenant_and_timeline_id();

-        // Stop ingesting data, so that we are not still writing to an InMemoryLayer while
-        // trying to flush
+        // Signal any subscribers to our cancellation token to drop out
+        tracing::debug!("Cancelling CancellationToken");
+        self.cancel.cancel();
+
+        // prevent writes to the InMemoryLayer
        tracing::debug!("Waiting for WalReceiverManager...");
        task_mgr::shutdown_tasks(
            Some(TaskKind::WalReceiverManager),
@@ -922,70 +908,40 @@ impl Timeline {
        )
        .await;

-        // Since we have shut down WAL ingest, we should not let anyone start waiting for the LSN to advance
-        self.last_record_lsn.shutdown();
-
        // now all writers to InMemory layer are gone, do the final flush if requested
-        match self.freeze_and_flush().await {
-            Ok(_) => {
-                // drain the upload queue
-                if let Some(client) = self.remote_client.as_ref() {
-                    // if we did not wait for completion here, it might be our shutdown process
-                    // didn't wait for remote uploads to complete at all, as new tasks can forever
-                    // be spawned.
-                    //
-                    // what is problematic is the shutting down of RemoteTimelineClient, because
-                    // obviously it does not make sense to stop while we wait for it, but what
-                    // about corner cases like s3 suddenly hanging up?
-                    if let Err(e) = client.wait_completion().await {
-                        // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that
-                        // we have some extra WAL replay to do next time the timeline starts.
-                        warn!("failed to flush to remote storage: {e:#}");
-                    }
+        if freeze_and_flush {
+            match self.freeze_and_flush().await {
+                Ok(()) => {}
+                Err(e) => {
+                    warn!("failed to freeze and flush: {e:#}");
+                    return; // TODO: should probably drain remote timeline client anyways?
                }
            }
-            Err(e) => {
-                // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that
-                // we have some extra WAL replay to do next time the timeline starts.
-                warn!("failed to freeze and flush: {e:#}");
+
+            // drain the upload queue
+            let res = if let Some(client) = self.remote_client.as_ref() {
+                // if we did not wait for completion here, it might be our shutdown process
+                // didn't wait for remote uploads to complete at all, as new tasks can forever
+                // be spawned.
+                //
+                // what is problematic is the shutting down of RemoteTimelineClient, because
+                // obviously it does not make sense to stop while we wait for it, but what
+                // about corner cases like s3 suddenly hanging up?
+                client.wait_completion().await
+            } else {
+                Ok(())
+            };
+
+            if let Err(e) = res {
+                warn!("failed to await for frozen and flushed uploads: {e:#}");
            }
        }

-        self.shutdown().await;
-    }
-
-    /// Shut down immediately, without waiting for any open layers to flush to disk.  This is a subset of
-    /// the graceful [`Timeline::flush_and_shutdown`] function.
-    pub(crate) async fn shutdown(&self) {
-        // Signal any subscribers to our cancellation token to drop out
-        tracing::debug!("Cancelling CancellationToken");
-        self.cancel.cancel();
-
        // Page request handlers might be waiting for LSN to advance: they do not respect Timeline::cancel
        // while doing so.
        self.last_record_lsn.shutdown();

-        // Shut down the layer flush task before the remote client, as one depends on the other
-        task_mgr::shutdown_tasks(
-            Some(TaskKind::LayerFlushTask),
-            Some(self.tenant_id),
-            Some(self.timeline_id),
-        )
-        .await;
-
-        // Shut down remote timeline client: this gracefully moves its metadata into its Stopping state in
-        // case our caller wants to use that for a deletion
-        if let Some(remote_client) = self.remote_client.as_ref() {
-            match remote_client.stop() {
-                Ok(()) => {}
-                Err(StopError::QueueUninitialized) => {
-                    // Shutting down during initialization is legal
-                }
-            }
-        }
-
        tracing::debug!("Waiting for tasks...");
-
        task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(self.timeline_id)).await;

        // Finally wait until any gate-holders are complete
@@ -1029,12 +985,7 @@ impl Timeline {
            reason,
            backtrace: backtrace_str,
        };
-        self.set_state(broken_state);
-
-        // Although the Broken state is not equivalent to shutdown() (shutdown will be called
-        // later when this tenant is detach or the process shuts down), firing the cancellation token
-        // here avoids the need for other tasks to watch for the Broken state explicitly.
-        self.cancel.cancel();
+        self.set_state(broken_state)
    }

    pub fn current_state(&self) -> TimelineState {
@@ -1790,8 +1741,12 @@ impl Timeline {
                // delay will be terminated by a timeout regardless.
                let _completion = { self_clone.initial_logical_size_attempt.lock().expect("unexpected initial_logical_size_attempt poisoned").take() };

+                // no extra cancellation here, because nothing really waits for this to complete compared
+                // to spawn_ondemand_logical_size_calculation.
+                let cancel = CancellationToken::new();
+
                let calculated_size = match self_clone
-                    .logical_size_calculation_task(lsn, LogicalSizeCalculationCause::Initial, &background_ctx)
+                    .logical_size_calculation_task(lsn, LogicalSizeCalculationCause::Initial, &background_ctx, cancel)
                    .await
                {
                    Ok(s) => s,
@@ -1860,6 +1815,7 @@ impl Timeline {
        lsn: Lsn,
        cause: LogicalSizeCalculationCause,
        ctx: RequestContext,
+        cancel: CancellationToken,
    ) -> oneshot::Receiver<Result<u64, CalculateLogicalSizeError>> {
        let (sender, receiver) = oneshot::channel();
        let self_clone = Arc::clone(self);
@@ -1880,7 +1836,7 @@ impl Timeline {
            false,
            async move {
                let res = self_clone
-                    .logical_size_calculation_task(lsn, cause, &ctx)
+                    .logical_size_calculation_task(lsn, cause, &ctx, cancel)
                    .await;
                let _ = sender.send(res).ok();
                Ok(()) // Receiver is responsible for handling errors
@@ -1896,28 +1852,58 @@ impl Timeline {
        lsn: Lsn,
        cause: LogicalSizeCalculationCause,
        ctx: &RequestContext,
+        cancel: CancellationToken,
    ) -> Result<u64, CalculateLogicalSizeError> {
        span::debug_assert_current_span_has_tenant_and_timeline_id();

-        let _guard = self.gate.enter();
-
+        let mut timeline_state_updates = self.subscribe_for_state_updates();
        let self_calculation = Arc::clone(self);

        let mut calculation = pin!(async {
+            let cancel = cancel.child_token();
            let ctx = ctx.attached_child();
            self_calculation
-                .calculate_logical_size(lsn, cause, &ctx)
+                .calculate_logical_size(lsn, cause, cancel, &ctx)
                .await
        });
+        let timeline_state_cancellation = async {
+            loop {
+                match timeline_state_updates.changed().await {
+                    Ok(()) => {
+                        let new_state = timeline_state_updates.borrow().clone();
+                        match new_state {
+                            // we're running this job for active timelines only
+                            TimelineState::Active => continue,
+                            TimelineState::Broken { .. }
+                            | TimelineState::Stopping
+                            | TimelineState::Loading => {
+                                break format!("aborted because timeline became inactive (new state: {new_state:?})")
+                            }
+                        }
+                    }
+                    Err(_sender_dropped_error) => {
+                        // can't happen, the sender is not dropped as long as the Timeline exists
+                        break "aborted because state watch was dropped".to_string();
+                    }
+                }
+            }
+        };
+
+        let taskmgr_shutdown_cancellation = async {
+            task_mgr::shutdown_watcher().await;
+            "aborted because task_mgr shutdown requested".to_string()
+        };

        tokio::select! {
            res = &mut calculation => { res }
-            _ = self.cancel.cancelled() => {
-                debug!("cancelling logical size calculation for timeline shutdown");
+            reason = timeline_state_cancellation => {
+                debug!(reason = reason, "cancelling calculation");
+                cancel.cancel();
                calculation.await
            }
-            _ = task_mgr::shutdown_watcher() => {
-                debug!("cancelling logical size calculation for task shutdown");
+            reason = taskmgr_shutdown_cancellation => {
+                debug!(reason = reason, "cancelling calculation");
+                cancel.cancel();
                calculation.await
            }
        }
@@ -1931,6 +1917,7 @@ impl Timeline {
        &self,
        up_to_lsn: Lsn,
        cause: LogicalSizeCalculationCause,
+        cancel: CancellationToken,
        ctx: &RequestContext,
    ) -> Result<u64, CalculateLogicalSizeError> {
        info!(
@@ -1973,7 +1960,7 @@ impl Timeline {
        };
        let timer = storage_time_metrics.start_timer();
        let logical_size = self
-            .get_current_logical_size_non_incremental(up_to_lsn, ctx)
+            .get_current_logical_size_non_incremental(up_to_lsn, cancel, ctx)
            .await?;
        debug!("calculated logical size: {logical_size}");
        timer.stop_and_record();
@@ -2386,10 +2373,6 @@ impl Timeline {
        info!("started flush loop");
        loop {
            tokio::select! {
-                _ = self.cancel.cancelled() => {
-                    info!("shutting down layer flush task");
-                    break;
-                },
                _ = task_mgr::shutdown_watcher() => {
                    info!("shutting down layer flush task");
                    break;
@@ -2401,14 +2384,6 @@ impl Timeline {
            let timer = self.metrics.flush_time_histo.start_timer();
            let flush_counter = *layer_flush_start_rx.borrow();
            let result = loop {
-                if self.cancel.is_cancelled() {
-                    info!("dropping out of flush loop for timeline shutdown");
-                    // Note: we do not bother transmitting into [`layer_flush_done_tx`], because
-                    // anyone waiting on that will respect self.cancel as well: they will stop
-                    // waiting at the same time we as drop out of this loop.
-                    return;
-                }
-
                let layer_to_flush = {
                    let guard = self.layers.read().await;
                    guard.layer_map().frozen_layers.front().cloned()
@@ -2417,18 +2392,9 @@ impl Timeline {
                let Some(layer_to_flush) = layer_to_flush else {
                    break Ok(());
                };
-                match self.flush_frozen_layer(layer_to_flush, ctx).await {
-                    Ok(()) => {}
-                    Err(FlushLayerError::Cancelled) => {
-                        info!("dropping out of flush loop for timeline shutdown");
-                        return;
-                    }
-                    err @ Err(
-                        FlushLayerError::Other(_) | FlushLayerError::PageReconstructError(_),
-                    ) => {
-                        error!("could not flush frozen layer: {err:?}");
-                        break err;
-                    }
+                if let Err(err) = self.flush_frozen_layer(layer_to_flush, ctx).await {
+                    error!("could not flush frozen layer: {err:?}");
+                    break Err(err);
                }
            };
            // Notify any listeners that we're done
@@ -2477,17 +2443,7 @@ impl Timeline {
                }
            }
            trace!("waiting for flush to complete");
-            tokio::select! {
-                rx_e = rx.changed() => {
-                    rx_e?;
-                },
-                // Cancellation safety: we are not leaving an I/O in-flight for the flush, we're just ignoring
-                // the notification from [`flush_loop`] that it completed.
-                _ = self.cancel.cancelled() => {
-                    tracing::info!("Cancelled layer flush due on timeline shutdown");
-                    return Ok(())
-                }
-            };
+            rx.changed().await?;
            trace!("done")
        }
    }
@@ -2502,7 +2458,7 @@ impl Timeline {
        self: &Arc<Self>,
        frozen_layer: Arc<InMemoryLayer>,
        ctx: &RequestContext,
-    ) -> Result<(), FlushLayerError> {
+    ) -> anyhow::Result<()> {
        // As a special case, when we have just imported an image into the repository,
        // instead of writing out a L0 delta layer, we directly write out image layer
        // files instead. This is possible as long as *all* the data imported into the
@@ -2527,11 +2483,6 @@ impl Timeline {
                let (partitioning, _lsn) = self
                    .repartition(self.initdb_lsn, self.get_compaction_target_size(), ctx)
                    .await?;
-
-                if self.cancel.is_cancelled() {
-                    return Err(FlushLayerError::Cancelled);
-                }
-
                // For image layers, we add them immediately into the layer map.
                (
                    self.create_image_layers(&partitioning, self.initdb_lsn, true, ctx)
@@ -2563,10 +2514,6 @@ impl Timeline {
                )
            };

-        if self.cancel.is_cancelled() {
-            return Err(FlushLayerError::Cancelled);
-        }
-
        let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);
        let old_disk_consistent_lsn = self.disk_consistent_lsn.load();

@@ -2576,10 +2523,6 @@ impl Timeline {
        let metadata = {
            let mut guard = self.layers.write().await;

-            if self.cancel.is_cancelled() {
-                return Err(FlushLayerError::Cancelled);
-            }
-
            guard.finish_flush_l0_layer(delta_layer_to_add.as_ref(), &frozen_layer, &self.metrics);

            if disk_consistent_lsn != old_disk_consistent_lsn {
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -326,7 +326,8 @@ impl Timeline {
        match state.last_layer_access_imitation {
            Some(ts) if ts.elapsed() < inter_imitate_period => { /* no need to run */ }
            _ => {
-                self.imitate_timeline_cached_layer_accesses(ctx).await;
+                self.imitate_timeline_cached_layer_accesses(cancel, ctx)
+                    .await;
                state.last_layer_access_imitation = Some(tokio::time::Instant::now())
            }
        }
@@ -366,12 +367,21 @@ impl Timeline {

    /// Recompute the values which would cause on-demand downloads during restart.
    #[instrument(skip_all)]
-    async fn imitate_timeline_cached_layer_accesses(&self, ctx: &RequestContext) {
+    async fn imitate_timeline_cached_layer_accesses(
+        &self,
+        cancel: &CancellationToken,
+        ctx: &RequestContext,
+    ) {
        let lsn = self.get_last_record_lsn();

        // imitiate on-restart initial logical size
        let size = self
-            .calculate_logical_size(lsn, LogicalSizeCalculationCause::EvictionTaskImitation, ctx)
+            .calculate_logical_size(
+                lsn,
+                LogicalSizeCalculationCause::EvictionTaskImitation,
+                cancel.clone(),
+                ctx,
+            )
            .instrument(info_span!("calculate_logical_size"))
            .await;

--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -44,6 +44,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
 use crate::config::PageServerConf;
 use crate::metrics::{
    WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
+    WAL_REDO_WAIT_TIME,
 };
 use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block};
 use crate::repository::Key;
@@ -206,8 +207,11 @@ impl PostgresRedoManager {
    ) -> anyhow::Result<Bytes> {
        let (rel, blknum) = key_to_rel_block(key).context("invalid record")?;
        const MAX_RETRY_ATTEMPTS: u32 = 1;
+        let start_time = Instant::now();
        let mut n_attempts = 0u32;
        loop {
+            let lock_time = Instant::now();
+
            // launch the WAL redo process on first use
            let proc: Arc<WalRedoProcess> = {
                let proc_guard = self.redo_process.read().unwrap();
@@ -232,7 +236,7 @@ impl PostgresRedoManager {
                }
            };

-            let started_at = std::time::Instant::now();
+            WAL_REDO_WAIT_TIME.observe(lock_time.duration_since(start_time).as_secs_f64());

            // Relational WAL records are applied using wal-redo-postgres
            let buf_tag = BufferTag { rel, blknum };
@@ -240,7 +244,8 @@ impl PostgresRedoManager {
                .apply_wal_records(buf_tag, &base_img, records, wal_redo_timeout)
                .context("apply_wal_records");

-            let duration = started_at.elapsed();
+            let end_time = Instant::now();
+            let duration = end_time.duration_since(lock_time);

            let len = records.len();
            let nbytes = records.iter().fold(0, |acumulator, record| {
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -33,6 +33,7 @@ typedef enum
 	T_NeonNblocksRequest,
 	T_NeonGetPageRequest,
 	T_NeonDbSizeRequest,
+	T_NeonPagestoreNoopRequest,

 	/* pagestore -> pagestore_client */
 	T_NeonExistsResponse = 100,
@@ -40,6 +41,7 @@ typedef enum
 	T_NeonGetPageResponse,
 	T_NeonErrorResponse,
 	T_NeonDbSizeResponse,
+	T_NeonPagestoreNoopResponse,
 }			NeonMessageTag;

 /* base struct for c-style inheritance */
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -80,9 +80,6 @@ struct ProxyCliArgs {
    /// cache for `wake_compute` api method (use `size=0` to disable)
    #[clap(long, default_value = config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO)]
    wake_compute_cache: String,
-    /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
-    wake_compute_lock: String,
    /// Allow self-signed certificates for compute nodes (for testing)
    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
    allow_self_signed_compute: bool,
@@ -223,23 +220,10 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
                node_info: console::caches::NodeInfoCache::new("node_info_cache", size, ttl),
            }));

-            let config::WakeComputeLockOptions {
-                shards,
-                permits,
-                epoch,
-                timeout,
-            } = args.wake_compute_lock.parse()?;
-            info!(permits, shards, ?epoch, "Using NodeLocks (wake_compute)");
-            let locks = Box::leak(Box::new(
-                console::locks::ApiLocks::new("wake_compute_lock", permits, shards, timeout)
-                    .unwrap(),
-            ));
-            tokio::spawn(locks.garbage_collect_worker(epoch));
-
            let url = args.auth_endpoint.parse()?;
            let endpoint = http::Endpoint::new(url, http::new_client());

-            let api = console::provider::neon::Api::new(endpoint, caches, locks);
+            let api = console::provider::neon::Api::new(endpoint, caches);
            auth::BackendType::Console(Cow::Owned(api), ())
        }
        AuthBackend::Postgres => {
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -264,79 +264,6 @@ impl FromStr for CacheOptions {
    }
 }

-/// Helper for cmdline cache options parsing.
-pub struct WakeComputeLockOptions {
-    /// The number of shards the lock map should have
-    pub shards: usize,
-    /// The number of allowed concurrent requests for each endpoitn
-    pub permits: usize,
-    /// Garbage collection epoch
-    pub epoch: Duration,
-    /// Lock timeout
-    pub timeout: Duration,
-}
-
-impl WakeComputeLockOptions {
-    /// Default options for [`crate::console::provider::ApiLocks`].
-    pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
-
-    // pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "shards=32,permits=4,epoch=10m,timeout=1s";
-
-    /// Parse lock options passed via cmdline.
-    /// Example: [`Self::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK`].
-    fn parse(options: &str) -> anyhow::Result<Self> {
-        let mut shards = None;
-        let mut permits = None;
-        let mut epoch = None;
-        let mut timeout = None;
-
-        for option in options.split(',') {
-            let (key, value) = option
-                .split_once('=')
-                .with_context(|| format!("bad key-value pair: {option}"))?;
-
-            match key {
-                "shards" => shards = Some(value.parse()?),
-                "permits" => permits = Some(value.parse()?),
-                "epoch" => epoch = Some(humantime::parse_duration(value)?),
-                "timeout" => timeout = Some(humantime::parse_duration(value)?),
-                unknown => bail!("unknown key: {unknown}"),
-            }
-        }
-
-        // these dont matter if lock is disabled
-        if let Some(0) = permits {
-            timeout = Some(Duration::default());
-            epoch = Some(Duration::default());
-            shards = Some(2);
-        }
-
-        let out = Self {
-            shards: shards.context("missing `shards`")?,
-            permits: permits.context("missing `permits`")?,
-            epoch: epoch.context("missing `epoch`")?,
-            timeout: timeout.context("missing `timeout`")?,
-        };
-
-        ensure!(out.shards > 1, "shard count must be > 1");
-        ensure!(
-            out.shards.is_power_of_two(),
-            "shard count must be a power of two"
-        );
-
-        Ok(out)
-    }
-}
-
-impl FromStr for WakeComputeLockOptions {
-    type Err = anyhow::Error;
-
-    fn from_str(options: &str) -> Result<Self, Self::Err> {
-        let error = || format!("failed to parse cache lock options '{options}'");
-        Self::parse(options).with_context(error)
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -361,42 +288,4 @@ mod tests {

        Ok(())
    }
-
-    #[test]
-    fn test_parse_lock_options() -> anyhow::Result<()> {
-        let WakeComputeLockOptions {
-            epoch,
-            permits,
-            shards,
-            timeout,
-        } = "shards=32,permits=4,epoch=10m,timeout=1s".parse()?;
-        assert_eq!(epoch, Duration::from_secs(10 * 60));
-        assert_eq!(timeout, Duration::from_secs(1));
-        assert_eq!(shards, 32);
-        assert_eq!(permits, 4);
-
-        let WakeComputeLockOptions {
-            epoch,
-            permits,
-            shards,
-            timeout,
-        } = "epoch=60s,shards=16,timeout=100ms,permits=8".parse()?;
-        assert_eq!(epoch, Duration::from_secs(60));
-        assert_eq!(timeout, Duration::from_millis(100));
-        assert_eq!(shards, 16);
-        assert_eq!(permits, 8);
-
-        let WakeComputeLockOptions {
-            epoch,
-            permits,
-            shards,
-            timeout,
-        } = "permits=0".parse()?;
-        assert_eq!(epoch, Duration::ZERO);
-        assert_eq!(timeout, Duration::ZERO);
-        assert_eq!(shards, 2);
-        assert_eq!(permits, 0);
-
-        Ok(())
-    }
 }
--- a/proxy/src/console.rs
+++ b/proxy/src/console.rs
@@ -13,10 +13,5 @@ pub mod caches {
    pub use super::provider::{ApiCaches, NodeInfoCache};
 }

-/// Various cache-related types.
-pub mod locks {
-    pub use super::provider::ApiLocks;
-}
-
 /// Console's management API.
 pub mod mgmt;
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -8,13 +8,7 @@ use crate::{
    compute, scram,
 };
 use async_trait::async_trait;
-use dashmap::DashMap;
-use std::{sync::Arc, time::Duration};
-use tokio::{
-    sync::{OwnedSemaphorePermit, Semaphore},
-    time::Instant,
-};
-use tracing::info;
+use std::sync::Arc;

 pub mod errors {
    use crate::{
@@ -155,9 +149,6 @@ pub mod errors {

        #[error(transparent)]
        ApiError(ApiError),
-
-        #[error("Timeout waiting to acquire wake compute lock")]
-        TimeoutError,
    }

    // This allows more useful interactions than `#[from]`.
@@ -167,17 +158,6 @@ pub mod errors {
        }
    }

-    impl From<tokio::sync::AcquireError> for WakeComputeError {
-        fn from(_: tokio::sync::AcquireError) -> Self {
-            WakeComputeError::TimeoutError
-        }
-    }
-    impl From<tokio::time::error::Elapsed> for WakeComputeError {
-        fn from(_: tokio::time::error::Elapsed) -> Self {
-            WakeComputeError::TimeoutError
-        }
-    }
-
    impl UserFacingError for WakeComputeError {
        fn to_string_client(&self) -> String {
            use WakeComputeError::*;
@@ -187,8 +167,6 @@ pub mod errors {
                BadComputeAddress(_) => REQUEST_FAILED.to_owned(),
                // However, API might return a meaningful error.
                ApiError(e) => e.to_string_client(),
-
-                TimeoutError => "timeout while acquiring the compute resource lock".to_owned(),
            }
        }
    }
@@ -255,145 +233,3 @@ pub struct ApiCaches {
    /// Cache for the `wake_compute` API method.
    pub node_info: NodeInfoCache,
 }
-
-/// Various caches for [`console`](super).
-pub struct ApiLocks {
-    name: &'static str,
-    node_locks: DashMap<Arc<str>, Arc<Semaphore>>,
-    permits: usize,
-    timeout: Duration,
-    registered: prometheus::IntCounter,
-    unregistered: prometheus::IntCounter,
-    reclamation_lag: prometheus::Histogram,
-    lock_acquire_lag: prometheus::Histogram,
-}
-
-impl ApiLocks {
-    pub fn new(
-        name: &'static str,
-        permits: usize,
-        shards: usize,
-        timeout: Duration,
-    ) -> prometheus::Result<Self> {
-        let registered = prometheus::IntCounter::with_opts(
-            prometheus::Opts::new(
-                "semaphores_registered",
-                "Number of semaphores registered in this api lock",
-            )
-            .namespace(name),
-        )?;
-        prometheus::register(Box::new(registered.clone()))?;
-        let unregistered = prometheus::IntCounter::with_opts(
-            prometheus::Opts::new(
-                "semaphores_unregistered",
-                "Number of semaphores unregistered in this api lock",
-            )
-            .namespace(name),
-        )?;
-        prometheus::register(Box::new(unregistered.clone()))?;
-        let reclamation_lag = prometheus::Histogram::with_opts(
-            prometheus::HistogramOpts::new(
-                "reclamation_lag_seconds",
-                "Time it takes to reclaim unused semaphores in the api lock",
-            )
-            .namespace(name)
-            // 1us -> 65ms
-            // benchmarks on my mac indicate it's usually in the range of 256us and 512us
-            .buckets(prometheus::exponential_buckets(1e-6, 2.0, 16)?),
-        )?;
-        prometheus::register(Box::new(reclamation_lag.clone()))?;
-        let lock_acquire_lag = prometheus::Histogram::with_opts(
-            prometheus::HistogramOpts::new(
-                "semaphore_acquire_seconds",
-                "Time it takes to reclaim unused semaphores in the api lock",
-            )
-            .namespace(name)
-            // 0.1ms -> 6s
-            .buckets(prometheus::exponential_buckets(1e-4, 2.0, 16)?),
-        )?;
-        prometheus::register(Box::new(lock_acquire_lag.clone()))?;
-
-        Ok(Self {
-            name,
-            node_locks: DashMap::with_shard_amount(shards),
-            permits,
-            timeout,
-            lock_acquire_lag,
-            registered,
-            unregistered,
-            reclamation_lag,
-        })
-    }
-
-    pub async fn get_wake_compute_permit(
-        &self,
-        key: &Arc<str>,
-    ) -> Result<WakeComputePermit, errors::WakeComputeError> {
-        if self.permits == 0 {
-            return Ok(WakeComputePermit { permit: None });
-        }
-        let now = Instant::now();
-        let semaphore = {
-            // get fast path
-            if let Some(semaphore) = self.node_locks.get(key) {
-                semaphore.clone()
-            } else {
-                self.node_locks
-                    .entry(key.clone())
-                    .or_insert_with(|| {
-                        self.registered.inc();
-                        Arc::new(Semaphore::new(self.permits))
-                    })
-                    .clone()
-            }
-        };
-        let permit = tokio::time::timeout_at(now + self.timeout, semaphore.acquire_owned()).await;
-
-        self.lock_acquire_lag
-            .observe((Instant::now() - now).as_secs_f64());
-
-        Ok(WakeComputePermit {
-            permit: Some(permit??),
-        })
-    }
-
-    pub async fn garbage_collect_worker(&self, epoch: std::time::Duration) {
-        if self.permits == 0 {
-            return;
-        }
-
-        let mut interval = tokio::time::interval(epoch / (self.node_locks.shards().len()) as u32);
-        loop {
-            for (i, shard) in self.node_locks.shards().iter().enumerate() {
-                interval.tick().await;
-                // temporary lock a single shard and then clear any semaphores that aren't currently checked out
-                // race conditions: if strong_count == 1, there's no way that it can increase while the shard is locked
-                // therefore releasing it is safe from race conditions
-                info!(
-                    name = self.name,
-                    shard = i,
-                    "performing epoch reclamation on api lock"
-                );
-                let mut lock = shard.write();
-                let timer = self.reclamation_lag.start_timer();
-                let count = lock
-                    .extract_if(|_, semaphore| Arc::strong_count(semaphore.get_mut()) == 1)
-                    .count();
-                drop(lock);
-                self.unregistered.inc_by(count as u64);
-                timer.observe_duration()
-            }
-        }
-    }
-}
-
-pub struct WakeComputePermit {
-    // None if the lock is disabled
-    permit: Option<OwnedSemaphorePermit>,
-}
-
-impl WakeComputePermit {
-    pub fn should_check_cache(&self) -> bool {
-        self.permit.is_some()
-    }
-}
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -3,12 +3,12 @@
 use super::{
    super::messages::{ConsoleError, GetRoleSecret, WakeCompute},
    errors::{ApiError, GetAuthInfoError, WakeComputeError},
-    ApiCaches, ApiLocks, AuthInfo, CachedNodeInfo, ConsoleReqExtra, NodeInfo,
+    ApiCaches, AuthInfo, CachedNodeInfo, ConsoleReqExtra, NodeInfo,
 };
 use crate::{auth::ClientCredentials, compute, http, scram};
 use async_trait::async_trait;
 use futures::TryFutureExt;
-use std::{net::SocketAddr, sync::Arc};
+use std::net::SocketAddr;
 use tokio::time::Instant;
 use tokio_postgres::config::SslMode;
 use tracing::{error, info, info_span, warn, Instrument};
@@ -17,17 +17,12 @@ use tracing::{error, info, info_span, warn, Instrument};
 pub struct Api {
    endpoint: http::Endpoint,
    caches: &'static ApiCaches,
-    locks: &'static ApiLocks,
    jwt: String,
 }

 impl Api {
    /// Construct an API object containing the auth parameters.
-    pub fn new(
-        endpoint: http::Endpoint,
-        caches: &'static ApiCaches,
-        locks: &'static ApiLocks,
-    ) -> Self {
+    pub fn new(endpoint: http::Endpoint, caches: &'static ApiCaches) -> Self {
        let jwt: String = match std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN") {
            Ok(v) => v,
            Err(_) => "".to_string(),
@@ -35,7 +30,6 @@ impl Api {
        Self {
            endpoint,
            caches,
-            locks,
            jwt,
        }
    }
@@ -169,22 +163,9 @@ impl super::Api for Api {
            return Ok(cached);
        }

-        let key: Arc<str> = key.into();
-
-        let permit = self.locks.get_wake_compute_permit(&key).await?;
-
-        // after getting back a permit - it's possible the cache was filled
-        // double check
-        if permit.should_check_cache() {
-            if let Some(cached) = self.caches.node_info.get(&key) {
-                info!(key = &*key, "found cached compute node info");
-                return Ok(cached);
-            }
-        }
-
        let node = self.do_wake_compute(extra, creds).await?;
-        let (_, cached) = self.caches.node_info.insert(key.clone(), node);
-        info!(key = &*key, "created a cache entry for compute node info");
+        let (_, cached) = self.caches.node_info.insert(key.into(), node);
+        info!(key = key, "created a cache entry for compute node info");

        Ok(cached)
    }
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -570,7 +570,6 @@ fn report_error(e: &WakeComputeError, retry: bool) {
            "api_console_other_server_error"
        }
        WakeComputeError::ApiError(ApiError::Console { .. }) => "api_console_other_error",
-        WakeComputeError::TimeoutError => "timeout_error",
    };
    NUM_WAKEUP_FAILURES.with_label_values(&[retry, kind]).inc();
 }
--- a/safekeeper/src/auth.rs
+++ b/safekeeper/src/auth.rs
@@ -1,20 +1,19 @@
-use utils::auth::{AuthError, Claims, Scope};
+use anyhow::{bail, Result};
+use utils::auth::{Claims, Scope};
 use utils::id::TenantId;

-pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<(), AuthError> {
+pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<()> {
    match (&claims.scope, tenant_id) {
-        (Scope::Tenant, None) => Err(AuthError(
-            "Attempt to access management api with tenant scope. Permission denied".into(),
-        )),
+        (Scope::Tenant, None) => {
+            bail!("Attempt to access management api with tenant scope. Permission denied")
+        }
        (Scope::Tenant, Some(tenant_id)) => {
            if claims.tenant_id.unwrap() != tenant_id {
-                return Err(AuthError("Tenant id mismatch. Permission denied".into()));
+                bail!("Tenant id mismatch. Permission denied")
            }
            Ok(())
        }
-        (Scope::PageServerApi, _) => Err(AuthError(
-            "PageServerApi scope makes no sense for Safekeeper".into(),
-        )),
+        (Scope::PageServerApi, _) => bail!("PageServerApi scope makes no sense for Safekeeper"),
        (Scope::SafekeeperData, _) => Ok(()),
    }
 }
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -6,7 +6,7 @@ use std::str::FromStr;
 use std::str::{self};
 use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::{debug, info, info_span, Instrument};
+use tracing::{info, info_span, Instrument};

 use crate::auth::check_permission;
 use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage};
@@ -165,27 +165,26 @@ impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
            .auth
            .as_ref()
            .expect("auth_type is configured but .auth of handler is missing");
-        let data = auth
-            .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)
-            .map_err(|e| QueryError::Unauthorized(e.0))?;
+        let data =
+            auth.decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)?;

        // The handler might be configured to allow only tenant scope tokens.
        if matches!(allowed_auth_scope, Scope::Tenant)
            && !matches!(data.claims.scope, Scope::Tenant)
        {
-            return Err(QueryError::Unauthorized(
-                "passed JWT token is for full access, but only tenant scope is allowed".into(),
-            ));
+            return Err(QueryError::Other(anyhow::anyhow!(
+                "passed JWT token is for full access, but only tenant scope is allowed"
+            )));
        }

        if matches!(data.claims.scope, Scope::Tenant) && data.claims.tenant_id.is_none() {
-            return Err(QueryError::Unauthorized(
-                "jwt token scope is Tenant, but tenant id is missing".into(),
-            ));
+            return Err(QueryError::Other(anyhow::anyhow!(
+                "jwt token scope is Tenant, but tenant id is missing"
+            )));
        }

-        debug!(
-            "jwt scope check succeeded for scope: {:#?} by tenant id: {:?}",
+        info!(
+            "jwt auth succeeded for scope: {:#?} by tenant id: {:?}",
            data.claims.scope, data.claims.tenant_id,
        );

@@ -264,7 +263,7 @@ impl SafekeeperPostgresHandler {

    // when accessing management api supply None as an argument
    // when using to authorize tenant pass corresponding tenant id
-    fn check_permission(&self, tenant_id: Option<TenantId>) -> Result<(), QueryError> {
+    fn check_permission(&self, tenant_id: Option<TenantId>) -> anyhow::Result<()> {
        if self.auth.is_none() {
            // auth is set to Trust, nothing to check so just return ok
            return Ok(());
@@ -276,7 +275,7 @@ impl SafekeeperPostgresHandler {
            .claims
            .as_ref()
            .expect("claims presence already checked");
-        check_permission(claims, tenant_id).map_err(|e| QueryError::Unauthorized(e.0))
+        check_permission(claims, tenant_id)
    }

    async fn handle_timeline_status<IO: AsyncRead + AsyncWrite + Unpin>(
--- a/test_runner/duplicate_tenant.py
+++ b/test_runner/duplicate_tenant.py
@@ -0,0 +1,43 @@
+# Usage from top of repo:
+#  poetry run python3 test_runner/duplicate_tenant.py b97965931096047b2d54958756baee7b 10
+from queue import Queue
+import sys
+import threading
+
+import requests
+from fixtures.pageserver.http import PageserverHttpClient
+from fixtures.types import TenantId
+
+initial_tenant = sys.argv[1]
+ncopies = int(sys.argv[2])
+numthreads = int(sys.argv[3])
+
+
+# class DuckTypedNeonEnv:
+#     pass
+
+
+# cli = NeonCli(DuckTypedNeonEnv())
+
+q = Queue()
+for i in range(0, ncopies):
+    q.put(i)
+
+for i in range(0, numthreads):
+    q.put(None)
+
+
+def create():
+    while True:
+        if q.get() == None:
+            break
+        new_tenant = TenantId.generate()
+        res = requests.post(
+            f"http://localhost:9898/v1/tenant/{initial_tenant}/duplicate",
+            json={"new_tenant_id": str(new_tenant)},
+        )
+        res.raise_for_status()
+
+
+for i in range(0, numthreads):
+    threading.Thread(target=create).start()
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -219,6 +219,25 @@ class PageserverHttpClient(requests.Session):
        assert isinstance(new_tenant_id, str)
        return TenantId(new_tenant_id)

+    def tenant_duplicate(
+        self, src_tenant_id: TenantId, new_tenant_id: TenantId, conf: Optional[Dict[str, Any]] = None
+    ) -> TenantId:
+        if conf is not None:
+            assert "new_tenant_id" not in conf.keys()
+        res = self.post(
+            f"http://localhost:{self.port}/v1/tenant/{src_tenant_id}/duplicate",
+            json={
+                "new_tenant_id": str(new_tenant_id),
+                **(conf or {}),
+            },
+        )
+        self.verbose_error(res)
+        if res.status_code == 409:
+            raise Exception(f"could not create tenant: already exists for id {new_tenant_id}")
+        new_tenant_id = res.json()
+        assert isinstance(new_tenant_id, str)
+        return TenantId(new_tenant_id)
+
    def tenant_attach(
        self,
        tenant_id: TenantId,
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -25,7 +25,7 @@ def assert_client_authorized(env: NeonEnv, http_client: PageserverHttpClient):
 def assert_client_not_authorized(env: NeonEnv, http_client: PageserverHttpClient):
    with pytest.raises(
        PageserverApiException,
-        match="Forbidden: JWT authentication error",
+        match="Unauthorized: malformed jwt token",
    ):
        assert_client_authorized(env, http_client)

@@ -56,7 +56,9 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
    assert_client_authorized(env, pageserver_http_client)

    # fail to create branch using token with different tenant_id
-    with pytest.raises(PageserverApiException, match="Forbidden: JWT authentication error"):
+    with pytest.raises(
+        PageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied"
+    ):
        assert_client_authorized(env, invalid_tenant_http_client)

    # create tenant using management token
@@ -65,7 +67,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
    # fail to create tenant using tenant token
    with pytest.raises(
        PageserverApiException,
-        match="Forbidden: JWT authentication error",
+        match="Forbidden: Attempt to access management api with tenant scope. Permission denied",
    ):
        tenant_http_client.tenant_create(TenantId.generate())

@@ -92,7 +94,6 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.auth_enabled = True
    env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
    env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")

    pageserver_token_old = env.auth_keys.generate_pageserver_token()
@@ -145,7 +146,6 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.auth_enabled = True
    env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
    env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")

    pageserver_token_old = env.auth_keys.generate_pageserver_token()
@@ -162,7 +162,7 @@ def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder):
    # Next attempt fails as we use the old auth token
    with pytest.raises(
        PageserverApiException,
-        match="Forbidden: JWT authentication error",
+        match="Unauthorized: malformed jwt token",
    ):
        pageserver_http_client_old.reload_auth_validation_keys()

--- a/test_runner/regress/test_bad_connection.py
+++ b/test_runner/regress/test_bad_connection.py
@@ -1,60 +0,0 @@
-import random
-import time
-
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder
-
-
-def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(".*simulated connection error.*")
-
-    pageserver_http = env.pageserver.http_client()
-    env.neon_cli.create_branch("test_compute_pageserver_connection_stress")
-    endpoint = env.endpoints.create_start("test_compute_pageserver_connection_stress")
-
-    # Enable failpoint after starting everything else up so that loading initial
-    # basebackup doesn't fail
-    pageserver_http.configure_failpoints(("simulated-bad-compute-connection", "50%return(15)"))
-
-    pg_conn = endpoint.connect()
-    cur = pg_conn.cursor()
-
-    # Create table, and insert some rows. Make it big enough that it doesn't fit in
-    # shared_buffers, otherwise the SELECT after restart will just return answer
-    # from shared_buffers without hitting the page server, which defeats the point
-    # of this test.
-    cur.execute("CREATE TABLE foo (t text)")
-    cur.execute(
-        """
-        INSERT INTO foo
-            SELECT 'long string to consume some space' || g
-            FROM generate_series(1, 100000) g
-        """
-    )
-
-    # Verify that the table is larger than shared_buffers
-    cur.execute(
-        """
-        select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_size
-        from pg_settings where name = 'shared_buffers'
-        """
-    )
-    row = cur.fetchone()
-    assert row is not None
-    log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
-    assert int(row[0]) < int(row[1])
-
-    cur.execute("SELECT count(*) FROM foo")
-    assert cur.fetchone() == (100000,)
-
-    end_time = time.time() + 30
-    times_executed = 0
-    while time.time() < end_time:
-        if random.random() < 0.5:
-            cur.execute("INSERT INTO foo VALUES ('stas'), ('heikki')")
-        else:
-            cur.execute("SELECT t FROM foo ORDER BY RANDOM() LIMIT 10")
-            cur.fetchall()
-        times_executed += 1
-    log.info(f"Workload executed {times_executed} times")
--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -26,7 +26,6 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
            ".*will not become active. Current state: Broken.*",
            ".*failed to load metadata.*",
            ".*load failed.*load local timeline.*",
-            ".*layer loading failed permanently: load layer: .*",
        ]
    )

--- a/test_runner/regress/test_tenant_duplicate.py
+++ b/test_runner/regress/test_tenant_duplicate.py
@@ -0,0 +1,54 @@
+import time
+from fixtures.neon_fixtures import (
+    NeonEnvBuilder,
+    last_flush_lsn_upload,
+)
+from fixtures.remote_storage import (
+    RemoteStorageKind,
+)
+from fixtures.types import TenantId
+from fixtures.log_helper import log
+
+
+def test_tenant_duplicate(
+    neon_env_builder: NeonEnvBuilder,
+):
+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
+
+    env = neon_env_builder.init_start()
+
+    with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as ep_main:
+        ep_main.safe_psql("CREATE TABLE foo (i int);")
+        ep_main.safe_psql("INSERT INTO foo VALUES (1), (2), (3);")
+        last_flush_lsn = last_flush_lsn_upload(
+            env, ep_main, env.initial_tenant, env.initial_timeline
+        )
+
+    new_tenant_id = TenantId.generate()
+    # timeline id remains unchanged with tenant_duplicate
+    # TODO: implement a remapping scheme so timeline ids remain globally unique
+    new_timeline_id = env.initial_timeline
+
+    log.info(f"Duplicate tenant/timeline will be: {new_tenant_id}/{new_timeline_id}")
+
+    ps_http = env.pageserver.http_client()
+
+    ps_http.tenant_duplicate(env.initial_tenant, new_tenant_id)
+
+    ps_http.tenant_delete(env.initial_tenant)
+
+    env.neon_cli.map_branch("duplicate", new_tenant_id, new_timeline_id)
+
+    # start read-only replicate and validate
+    with env.endpoints.create_start(
+        "duplicate", tenant_id=new_tenant_id, lsn=last_flush_lsn
+    ) as ep_dup:
+        with ep_dup.connect() as conn:
+            with conn.cursor() as cur:
+                cur.execute("SELECT * FROM foo ORDER BY i;")
+                cur.fetchall() == [(1,), (2,), (3,)]
+
+    # ensure restarting PS works
+    env.pageserver.stop()
+    env.pageserver.start()
+
--- a/trace/src/main.rs
+++ b/trace/src/main.rs
@@ -74,6 +74,7 @@ fn analyze_trace<R: std::io::Read>(mut reader: R) {
                prev = Some(req);
            }
            PagestreamFeMessage::DbSize(_) => {}
+            PagestreamFeMessage::NoOp => {},
        };
    }

--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -25,7 +25,6 @@ chrono = { version = "0.4", default-features = false, features = ["clock", "serd
 clap = { version = "4", features = ["derive", "string"] }
 clap_builder = { version = "4", default-features = false, features = ["color", "help", "std", "string", "suggestions", "usage"] }
 crossbeam-utils = { version = "0.8" }
-dashmap = { version = "5", default-features = false, features = ["raw-api"] }
 either = { version = "1" }
 fail = { version = "0.5", default-features = false, features = ["failpoints"] }
 futures = { version = "0.3" }
Author	SHA1	Message	Date
Christian Schwarz	6ebdf2784f	pq client: renames	2023-11-08 16:53:06 +00:00
Christian Schwarz	0fd45db92e	implement a benchmark for tokio tcp handling to figure out perf bottleneck in no_op libpq benchmark	2023-11-08 16:49:03 +00:00
Christian Schwarz	962af0ad6c	implement a standalone no-op server usable by getpage_bench_libpq by running it on a different port than the pageserver libpq listener, and overriding connstring for getpage_bench_libpq to point to the noop_server	2023-11-08 16:49:03 +00:00
Christian Schwarz	192bbd6c54	pq bench: configurable connection string	2023-11-08 16:49:03 +00:00
Christian Schwarz	0035cf840e	getpage_bench_libpq: support for the no-op mode	2023-11-08 16:49:03 +00:00
Christian Schwarz	2b01d661bd	define no-op request/response type inside pagestream & impl server side	2023-11-08 16:48:25 +00:00
Christian Schwarz	f51e608193	pq bench: avoid repeated conversion to_i128	2023-11-08 16:34:56 +00:00
Christian Schwarz	78a28f787c	per-second RPS	2023-11-08 16:34:56 +00:00
Christian Schwarz	001a0e4006	pq bench: proper shutdown	2023-11-08 16:34:56 +00:00
Christian Schwarz	daa2ea7ebe	http bench: sligthly improved stats	2023-11-08 16:34:56 +00:00
Christian Schwarz	37e8eba57f	WIP: libpq-based client depends on https://github.com/neondatabase/rust-postgres/pull/25	2023-11-08 16:34:56 +00:00
Christian Schwarz	f45882ef3c	rename getpage_bench to getpage_bench_http	2023-11-08 16:34:56 +00:00
Christian Schwarz	d16d02d61d	WIP: benchmark that does random getpage requests against the keyspace backup of pageserver.toml d =1 pg_distrib_dir ='/home/admin/neon-main/pg_install' http_auth_type ='Trust' pg_auth_type ='Trust' listen_http_addr ='127.0.0.1:9898' listen_pg_addr ='127.0.0.1:64000' broker_endpoint ='http://127.0.0.1:50051/' #control_plane_api ='http://127.0.0.1:1234/' # Initial configuration file created by 'pageserver --init' #listen_pg_addr = '127.0.0.1:64000' #listen_http_addr = '127.0.0.1:9898' #wait_lsn_timeout = '60 s' #wal_redo_timeout = '60 s' #max_file_descriptors = 10000 #page_cache_size = 160000 # initial superuser role name to use when creating a new tenant #initial_superuser_name = 'cloud_admin' #broker_endpoint = 'http://127.0.0.1:50051' #log_format = 'plain' #concurrent_tenant_size_logical_size_queries = '1' #metric_collection_interval = '10 min' #cached_metric_collection_interval = '0s' #synthetic_size_calculation_interval = '10 min' #disk_usage_based_eviction = { max_usage_pct = .., min_avail_bytes = .., period = "10s"} #background_task_maximum_delay = '10s' [tenant_config] #checkpoint_distance = 268435456 # in bytes #checkpoint_timeout = 10 m #compaction_target_size = 134217728 # in bytes #compaction_period = '20 s' #compaction_threshold = 10 #gc_period = '1 hr' #gc_horizon = 67108864 #image_creation_threshold = 3 #pitr_interval = '7 days' #min_resident_size_override = .. # in bytes #evictions_low_residence_duration_metric_threshold = '24 hour' #gc_feedback = false # make it determinsitic gc_period = '0s' checkpoint_timeout = '3650 day' compaction_period = '20 s' compaction_threshold = 10 compaction_target_size = 134217728 checkpoint_distance = 268435456 image_creation_threshold = 3 [remote_storage] local_path = '/home/admin/neon-main/bench_repo_dir/repo/remote_storage_local_fs'	2023-11-08 16:34:56 +00:00
Christian Schwarz	947f6d9491	API to duplicate a tenant	2023-11-08 16:34:56 +00:00