From 149cbd1e0a9ce721653d583099e13e63d1ee0fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 22 Apr 2025 23:27:01 +0200 Subject: [PATCH] Support single and two safekeeper scenarios (#11483) In tests and when one safekeeper is down in small regions, we need to contend with one or two safekeepers. Before, we gave an error in `safekeepers_for_new_timeline`. Now we just silently allow the timeline to be created on one or two safekeepers. Part of #9011 --- .../src/service/safekeeper_service.rs | 49 ++++++++++++++-- .../regress/test_storage_controller.py | 57 +++++++++++++++++++ 2 files changed, 101 insertions(+), 5 deletions(-) diff --git a/storage_controller/src/service/safekeeper_service.rs b/storage_controller/src/service/safekeeper_service.rs index 8a13c6af23..088b3c4741 100644 --- a/storage_controller/src/service/safekeeper_service.rs +++ b/storage_controller/src/service/safekeeper_service.rs @@ -151,11 +151,39 @@ impl Service { "Got {} non-successful responses from initial creation request of total {total_result_count} responses", remaining.len() ); - if remaining.len() >= 2 { + let target_sk_count = timeline_persistence.sk_set.len(); + let quorum_size = match target_sk_count { + 0 => { + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "timeline configured without any safekeepers", + ))); + } + 1 | 2 => { + #[cfg(feature = "testing")] + { + // In test settings, it is allowed to have one or two safekeepers + target_sk_count + } + #[cfg(not(feature = "testing"))] + { + // The region is misconfigured: we need at least three safekeepers to be configured + // in order to schedule work to them + tracing::warn!( + "couldn't find at least 3 safekeepers for timeline, found: {:?}", + timeline_persistence.sk_set + ); + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "couldn't find at least 3 safekeepers to put timeline to" + ))); + } + } + _ => target_sk_count / 2 + 1, + }; + let success_count = target_sk_count - remaining.len(); + if success_count < quorum_size { // Failure return Err(ApiError::InternalServerError(anyhow::anyhow!( - "not enough successful reconciliations to reach quorum, please retry: {} errored", - remaining.len() + "not enough successful reconciliations to reach quorum size: {success_count} of {quorum_size} of total {target_sk_count}" ))); } @@ -492,8 +520,6 @@ impl Service { pub(crate) async fn safekeepers_for_new_timeline( &self, ) -> Result, ApiError> { - // Number of safekeepers in different AZs we are looking for - let wanted_count = 3; let mut all_safekeepers = { let locked = self.inner.read().unwrap(); locked @@ -532,6 +558,19 @@ impl Service { sk.1.id.0, ) }); + // Number of safekeepers in different AZs we are looking for + let wanted_count = match all_safekeepers.len() { + 0 => { + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "couldn't find any active safekeeper for new timeline", + ))); + } + // Have laxer requirements on testig mode as we don't want to + // spin up three safekeepers for every single test + #[cfg(feature = "testing")] + 1 | 2 => all_safekeepers.len(), + _ => 3, + }; let mut sks = Vec::new(); let mut azs = HashSet::new(); for (_sk_util, sk_info, az_id) in all_safekeepers.iter() { diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 44b30e289d..0f291030fe 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -4242,6 +4242,63 @@ def test_storcon_create_delete_sk_down( wait_until(timeline_deleted_on_sk) +@run_only_on_default_postgres("PG version is not interesting here") +@pytest.mark.parametrize("num_safekeepers", [1, 2, 3]) +@pytest.mark.parametrize("deletetion_subject", [DeletionSubject.TENANT, DeletionSubject.TIMELINE]) +def test_storcon_few_sk( + neon_env_builder: NeonEnvBuilder, + num_safekeepers: int, + deletetion_subject: DeletionSubject, +): + """ + Test that the storcon can create and delete tenants and timelines with a limited/special number of safekeepers + - num_safekeepers: number of safekeepers. + - deletion_subject: test that both single timeline and whole tenant deletion work. + """ + + neon_env_builder.num_safekeepers = num_safekeepers + safekeeper_list = list(range(1, num_safekeepers + 1)) + neon_env_builder.storage_controller_config = { + "timelines_onto_safekeepers": True, + } + env = neon_env_builder.init_start() + + tenant_id = TenantId.generate() + timeline_id = TimelineId.generate() + env.create_tenant(tenant_id, timeline_id) + child_timeline_id = env.create_branch("child_of_main", tenant_id) + + env.safekeepers[0].assert_log_contains(f"creating new timeline {tenant_id}/{timeline_id}") + + config_lines = [ + "neon.safekeeper_proto_version = 3", + ] + with env.endpoints.create("main", tenant_id=tenant_id, config_lines=config_lines) as ep: + # endpoint should start. + ep.start(safekeeper_generation=1, safekeepers=safekeeper_list) + ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)") + + with env.endpoints.create( + "child_of_main", tenant_id=tenant_id, config_lines=config_lines + ) as ep: + # endpoint should start. + ep.start(safekeeper_generation=1, safekeepers=safekeeper_list) + ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)") + + if deletetion_subject is DeletionSubject.TENANT: + env.storage_controller.pageserver_api().tenant_delete(tenant_id) + else: + env.storage_controller.pageserver_api().timeline_delete(tenant_id, child_timeline_id) + + # ensure that there is log msgs for the third safekeeper too + def timeline_deleted_on_sk(): + env.safekeepers[0].assert_log_contains( + f"deleting timeline {tenant_id}/{child_timeline_id} from disk" + ) + + wait_until(timeline_deleted_on_sk) + + @pytest.mark.parametrize("wrong_az", [True, False]) def test_storage_controller_graceful_migration(neon_env_builder: NeonEnvBuilder, wrong_az: bool): """