From 12b39c9db95ec52353ab2bb3e21bc4a12306ce2b Mon Sep 17 00:00:00 2001 From: John Spray Date: Sat, 10 Feb 2024 11:56:52 +0000 Subject: [PATCH] control_plane: add debug APIs for force-dropping tenant/node (#6702) ## Problem When debugging/supporting this service, we sometimes need it to just forget about a tenant or node, e.g. because of an issue cleanly tearing them down. For example, if I create a tenant with a PlacementPolicy that can't be scheduled on the nodes we have, we would never be able to schedule it for a DELETE to work. ## Summary of changes - Add APIs for dropping nodes and tenants that do no teardown other than removing the entity from the DB and removing any references to it. --- control_plane/attachment_service/src/http.rs | 19 +++++++++ .../attachment_service/src/persistence.rs | 13 ++++++- .../attachment_service/src/service.rs | 39 +++++++++++++++++++ .../attachment_service/src/tenant_state.rs | 14 +++++++ test_runner/regress/test_sharding_service.py | 24 ++++++++++++ 5 files changed, 108 insertions(+), 1 deletion(-) diff --git a/control_plane/attachment_service/src/http.rs b/control_plane/attachment_service/src/http.rs index 8501e4980f..38785d3a98 100644 --- a/control_plane/attachment_service/src/http.rs +++ b/control_plane/attachment_service/src/http.rs @@ -280,6 +280,12 @@ async fn handle_node_list(req: Request) -> Result, ApiError json_response(StatusCode::OK, state.service.node_list().await?) } +async fn handle_node_drop(req: Request) -> Result, ApiError> { + let state = get_state(&req); + let node_id: NodeId = parse_request_param(&req, "node_id")?; + json_response(StatusCode::OK, state.service.node_drop(node_id).await?) +} + async fn handle_node_configure(mut req: Request) -> Result, ApiError> { let node_id: NodeId = parse_request_param(&req, "node_id")?; let config_req = json_request::(&mut req).await?; @@ -320,6 +326,13 @@ async fn handle_tenant_shard_migrate( ) } +async fn handle_tenant_drop(req: Request) -> Result, ApiError> { + let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; + let state = get_state(&req); + + json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?) +} + /// Status endpoint is just used for checking that our HTTP listener is up async fn handle_status(_req: Request) -> Result, ApiError> { json_response(StatusCode::OK, ()) @@ -402,6 +415,12 @@ pub fn make_router( request_span(r, handle_attach_hook) }) .post("/debug/v1/inspect", |r| request_span(r, handle_inspect)) + .post("/debug/v1/tenant/:tenant_id/drop", |r| { + request_span(r, handle_tenant_drop) + }) + .post("/debug/v1/node/:node_id/drop", |r| { + request_span(r, handle_node_drop) + }) .get("/control/v1/tenant/:tenant_id/locate", |r| { tenant_service_handler(r, handle_tenant_locate) }) diff --git a/control_plane/attachment_service/src/persistence.rs b/control_plane/attachment_service/src/persistence.rs index 623d625767..457dc43232 100644 --- a/control_plane/attachment_service/src/persistence.rs +++ b/control_plane/attachment_service/src/persistence.rs @@ -260,7 +260,6 @@ impl Persistence { /// Ordering: call this _after_ deleting the tenant on pageservers, but _before_ dropping state for /// the tenant from memory on this server. - #[allow(unused)] pub(crate) async fn delete_tenant(&self, del_tenant_id: TenantId) -> DatabaseResult<()> { use crate::schema::tenant_shards::dsl::*; self.with_conn(move |conn| -> DatabaseResult<()> { @@ -273,6 +272,18 @@ impl Persistence { .await } + pub(crate) async fn delete_node(&self, del_node_id: NodeId) -> DatabaseResult<()> { + use crate::schema::nodes::dsl::*; + self.with_conn(move |conn| -> DatabaseResult<()> { + diesel::delete(nodes) + .filter(node_id.eq(del_node_id.0 as i64)) + .execute(conn)?; + + Ok(()) + }) + .await + } + /// When a tenant invokes the /re-attach API, this function is responsible for doing an efficient /// batched increment of the generations of all tenants whose generation_pageserver is equal to /// the node that called /re-attach. diff --git a/control_plane/attachment_service/src/service.rs b/control_plane/attachment_service/src/service.rs index 0331087e0d..95efa8ecd7 100644 --- a/control_plane/attachment_service/src/service.rs +++ b/control_plane/attachment_service/src/service.rs @@ -1804,6 +1804,45 @@ impl Service { Ok(TenantShardMigrateResponse {}) } + /// This is for debug/support only: we simply drop all state for a tenant, without + /// detaching or deleting it on pageservers. + pub(crate) async fn tenant_drop(&self, tenant_id: TenantId) -> Result<(), ApiError> { + self.persistence.delete_tenant(tenant_id).await?; + + let mut locked = self.inner.write().unwrap(); + let mut shards = Vec::new(); + for (tenant_shard_id, _) in locked.tenants.range(TenantShardId::tenant_range(tenant_id)) { + shards.push(*tenant_shard_id); + } + + for shard in shards { + locked.tenants.remove(&shard); + } + + Ok(()) + } + + /// This is for debug/support only: we simply drop all state for a tenant, without + /// detaching or deleting it on pageservers. We do not try and re-schedule any + /// tenants that were on this node. + /// + /// TODO: proper node deletion API that unhooks things more gracefully + pub(crate) async fn node_drop(&self, node_id: NodeId) -> Result<(), ApiError> { + self.persistence.delete_node(node_id).await?; + + let mut locked = self.inner.write().unwrap(); + + for shard in locked.tenants.values_mut() { + shard.deref_node(node_id); + } + + let mut nodes = (*locked.nodes).clone(); + nodes.remove(&node_id); + locked.nodes = Arc::new(nodes); + + Ok(()) + } + pub(crate) async fn node_list(&self) -> Result, ApiError> { // It is convenient to avoid taking the big lock and converting Node to a serializable // structure, by fetching from storage instead of reading in-memory state. diff --git a/control_plane/attachment_service/src/tenant_state.rs b/control_plane/attachment_service/src/tenant_state.rs index c0ab076a55..1646ed9fcd 100644 --- a/control_plane/attachment_service/src/tenant_state.rs +++ b/control_plane/attachment_service/src/tenant_state.rs @@ -534,4 +534,18 @@ impl TenantState { seq: self.sequence, }) } + + // If we had any state at all referring to this node ID, drop it. Does not + // attempt to reschedule. + pub(crate) fn deref_node(&mut self, node_id: NodeId) { + if self.intent.attached == Some(node_id) { + self.intent.attached = None; + } + + self.intent.secondary.retain(|n| n != &node_id); + + self.observed.locations.remove(&node_id); + + debug_assert!(!self.intent.all_pageservers().contains(&node_id)); + } } diff --git a/test_runner/regress/test_sharding_service.py b/test_runner/regress/test_sharding_service.py index babb0d261c..248d992851 100644 --- a/test_runner/regress/test_sharding_service.py +++ b/test_runner/regress/test_sharding_service.py @@ -387,3 +387,27 @@ def test_sharding_service_compute_hook( assert notifications[1] == expect wait_until(10, 1, received_restart_notification) + + +def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder): + """ + Verify that occasional-use debug APIs work as expected. This is a lightweight test + that just hits the endpoints to check that they don't bitrot. + """ + + neon_env_builder.num_pageservers = 2 + env = neon_env_builder.init_start() + + tenant_id = TenantId.generate() + env.attachment_service.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192) + + # These APIs are intentionally not implemented as methods on NeonAttachmentService, as + # they're just for use in unanticipated circumstances. + env.attachment_service.request( + "POST", f"{env.attachment_service_api}/debug/v1/node/{env.pageservers[1].id}/drop" + ) + assert len(env.attachment_service.node_list()) == 1 + + env.attachment_service.request( + "POST", f"{env.attachment_service_api}/debug/v1/tenant/{tenant_id}/drop" + )