diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 6b549e198b..f4235aa5bf 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -338,9 +338,13 @@ impl ComputeNode { let lsn = match spec.mode { ComputeMode::Primary => { info!("starting safekeepers syncing"); - let lsn = self - .sync_safekeepers(pspec.storage_auth_token.clone()) - .with_context(|| "failed to sync safekeepers")?; + let lsn = if let Some(synced_lsn) = spec.skip_sync_safekeepers { + info!("no need to sync"); + synced_lsn + } else { + self.sync_safekeepers(pspec.storage_auth_token.clone()) + .with_context(|| "failed to sync safekeepers")? + }; info!("safekeepers synced at LSN {}", lsn); lsn } diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 52683ff1c3..12b456f39c 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -68,6 +68,7 @@ pub struct EndpointConf { http_port: u16, pg_version: u32, skip_pg_catalog_updates: bool, + skip_sync_safekeepers: Option, } // @@ -137,6 +138,7 @@ impl ComputeControlPlane { tenant_id, pg_version, skip_pg_catalog_updates: false, + skip_sync_safekeepers: None, }); ep.create_endpoint_dir()?; @@ -151,6 +153,7 @@ impl ComputeControlPlane { pg_port, pg_version, skip_pg_catalog_updates: false, + skip_sync_safekeepers: None, })?, )?; std::fs::write( @@ -189,6 +192,7 @@ pub struct Endpoint { // Optimizations skip_pg_catalog_updates: bool, + skip_sync_safekeepers: Option, } impl Endpoint { @@ -223,6 +227,7 @@ impl Endpoint { tenant_id: conf.tenant_id, pg_version: conf.pg_version, skip_pg_catalog_updates: conf.skip_pg_catalog_updates, + skip_sync_safekeepers: conf.skip_sync_safekeepers, }) } @@ -457,6 +462,7 @@ impl Endpoint { // Create spec file let spec = ComputeSpec { + skip_sync_safekeepers: self.skip_sync_safekeepers, skip_pg_catalog_updates: self.skip_pg_catalog_updates, format_version: 1.0, operation_uuid: None, diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index b3f0e9ba43..4d15f65aec 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -33,6 +33,15 @@ pub struct ComputeSpec { #[serde(default)] // Default false pub skip_pg_catalog_updates: bool, + /// An optinal hint that can be passed to speed up startup time if we know + /// that safekeepers have already been synced at the given LSN. + /// + /// NOTE: If there's any possibility that the safekeepers could have advanced + /// (e.g. if we started compute, and it crashed) we should stay on the + /// safe side and provide None. + #[serde(default)] + pub skip_sync_safekeepers: Option, + // Information needed to connect to the storage layer. // // `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed. diff --git a/test_runner/performance/test_startup.py b/test_runner/performance/test_startup.py index 8babbbe132..10d9fe9f53 100644 --- a/test_runner/performance/test_startup.py +++ b/test_runner/performance/test_startup.py @@ -30,7 +30,18 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch("test_startup") + tenant_id = env.initial_tenant + timeline_id = env.neon_cli.create_branch("test_startup") + + def get_synced_lsn(): + """Assert safekeepers are synced and get the LSN.""" + commit_lsns = [ + sk.http_client().timeline_status(tenant_id, timeline_id).commit_lsn + for sk in env.safekeepers + ] + assert len(commit_lsns) == 3 + assert len(set(commit_lsns)) == 1 + return commit_lsns[0] endpoint = None @@ -63,7 +74,8 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc endpoint.stop() # Imitate optimizations that console would do for the second start - endpoint.respec(skip_pg_catalog_updates=True) + lsn = get_synced_lsn() + endpoint.respec(skip_pg_catalog_updates=True, skip_sync_safekeepers=lsn.lsn_int) # This test sometimes runs for longer than the global 5 minute timeout.