diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index e4e906dbdb..88b7ff5144 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -8890,10 +8890,11 @@ impl Service { // to not stall the operation when a cold secondary is encountered. const SECONDARY_WARMUP_TIMEOUT: Duration = Duration::from_secs(30); const SECONDARY_DOWNLOAD_REQUEST_TIMEOUT: Duration = Duration::from_secs(5); - let reconciler_config = ReconcilerConfigBuilder::new(ReconcilerPriority::Normal) - .secondary_warmup_timeout(SECONDARY_WARMUP_TIMEOUT) - .secondary_download_request_timeout(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT) - .build(); + let reconciler_config: ReconcilerConfig = + ReconcilerConfigBuilder::new(ReconcilerPriority::Normal) + .secondary_warmup_timeout(SECONDARY_WARMUP_TIMEOUT) + .secondary_download_request_timeout(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT) + .build(); let mut waiters = Vec::new(); @@ -9072,10 +9073,10 @@ impl Service { async fn drain_secondary_attachments( self: &Arc, - node_id: NodeId, - cancel: CancellationToken, + _node_id: NodeId, + _cancel: CancellationToken, ) -> Result<(), OperationError> { - unimplemented!(); + Ok(()) } /// Create a node fill plan (pick secondaries to promote), based on: diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 5223e34baf..05fc5edae2 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2062,11 +2062,16 @@ class NeonStorageController(MetricsGetter, LogUtils): headers=self.headers(TokenScope.ADMIN), ) - def node_drain(self, node_id): - log.info(f"node_drain({node_id})") + def node_drain(self, node_id: int, drain_all: bool | None = None): + log.info(f"node_drain({node_id}, drain_all={drain_all})") + + url = f"{self.api}/control/v1/node/{node_id}/drain" + if drain_all is not None: + url += f"?drain_all={str(drain_all).lower()}" + self.request( "PUT", - f"{self.api}/control/v1/node/{node_id}/drain", + url, headers=self.headers(TokenScope.INFRA), ) diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 8f3aa010e3..c7d412ba3d 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -3093,6 +3093,70 @@ def test_storage_controller_ps_restarted_during_drain(neon_env_builder: NeonEnvB wait_until(reconfigure_node_again) +def test_drain_with_secondary_locations(neon_env_builder: NeonEnvBuilder): + neon_env_builder.num_pageservers = 4 + + env = neon_env_builder.init_configs() + env.start() + + def get_pageserver_tenant_shards(node_id): + ps = env.get_pageserver(node_id) + locations = ps.http_client().tenant_list_locations()["tenant_shards"] + ret = [] + for loc in locations: + ret.append( + { + "tenant_shard_id": TenantShardId.parse(loc[0]), + "mode": loc[1]["mode"], + } + ) + return ret + + def log_pageservers_state(): + for ps in env.pageservers: + for tenant_shard in get_pageserver_tenant_shards(ps.id): + tenant_shard_id = tenant_shard["tenant_shard_id"] + mode = tenant_shard["mode"] + log.info(f"[PS {ps.id}] Seen {tenant_shard_id} in mode {mode}") + + tenants = {} # id → shard_count + for shard_count in [1, 2, 4, 8]: + id, _ = env.create_tenant(shard_count=shard_count, placement_policy='{"Attached": 1}') + tenants[id] = shard_count + + log.info("Pageservers before reconcilation:") + log_pageservers_state() + + env.storage_controller.reconcile_until_idle() + + log.info("Pageservers before drain:") + log_pageservers_state() + + node_id = env.pageservers[0].id + + env.storage_controller.warm_up_all_secondaries() + env.storage_controller.retryable_node_operation( + lambda ps_id: env.storage_controller.node_drain(ps_id, drain_all=True), + node_id, + max_attempts=3, + backoff=2, + ) + + env.storage_controller.poll_node_status( + node_id, + PageserverAvailability.ACTIVE, + PageserverSchedulingPolicy.PAUSE_FOR_RESTART, + max_attempts=6, + backoff=5, + ) + + log.info("Pageservers after drain:") + log_pageservers_state() + + shards = get_pageserver_tenant_shards(node_id) + assert shards == [] + + def test_ps_unavailable_after_delete(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_pageservers = 3