From 56171cbe8c2b81ba2b949a5ec39c11991fb5e47a Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 2 Feb 2024 14:14:42 +0000 Subject: [PATCH] pageserver: more permissive activation timeout when testing (#6564) ## Problem The 5 second activation timeout is appropriate for production environments, where we want to give a prompt response to the cloud control plane, and if we fail it will retry the call. In tests however, we don't want every call to e.g. timeline create to have to come with a retry wrapper. This issue has always been there, but it is more apparent in sharding tests that concurrently attach several tenant shards. Closes: https://github.com/neondatabase/neon/issues/6563 ## Summary of changes When `testing` feature is enabled, make `ACTIVE_TENANT_TIMEOUT` 30 seconds instead of 5 seconds. --- pageserver/src/http/routes.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 88c36e8595..57ee746726 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -79,8 +79,14 @@ use utils::{ // For APIs that require an Active tenant, how long should we block waiting for that state? // This is not functionally necessary (clients will retry), but avoids generating a lot of // failed API calls while tenants are activating. +#[cfg(not(feature = "testing"))] const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000); +// Tests run on slow/oversubscribed nodes, and may need to wait much longer for tenants to +// finish attaching, if calls to remote storage are slow. +#[cfg(feature = "testing")] +const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000); + pub struct State { conf: &'static PageServerConf, tenant_manager: Arc,