From 12053cf83260da4da46f4039d5bbfb4cbb8e57da Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 13 Jan 2025 11:18:14 +0000 Subject: [PATCH] storage controller: improve consistency_check_api (#10363) ## Problem Limitations found while using this to investigate https://github.com/neondatabase/neon/issues/10234: - If we hit a node consistency issue, we drop out and don't check shards for consistency - The messages printed after a shard consistency issue are huge, and grafana appears to drop them. ## Summary of changes - Defer node consistency errors until the end of the function, so that we always proceed to check shards for consistency - Print out smaller log lines that just point out the diffs between expected and persistent state --- storage_controller/src/service.rs | 59 ++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 265b2798d2..430d884548 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -5256,7 +5256,8 @@ impl Service { expect_nodes.sort_by_key(|n| n.node_id); nodes.sort_by_key(|n| n.node_id); - if nodes != expect_nodes { + // Errors relating to nodes are deferred so that we don't skip the shard checks below if we have a node error + let node_result = if nodes != expect_nodes { tracing::error!("Consistency check failed on nodes."); tracing::error!( "Nodes in memory: {}", @@ -5268,10 +5269,12 @@ impl Service { serde_json::to_string(&nodes) .map_err(|e| ApiError::InternalServerError(e.into()))? ); - return Err(ApiError::InternalServerError(anyhow::anyhow!( + Err(ApiError::InternalServerError(anyhow::anyhow!( "Node consistency failure" - ))); - } + ))) + } else { + Ok(()) + }; let mut persistent_shards = self.persistence.load_active_tenant_shards().await?; persistent_shards @@ -5281,6 +5284,7 @@ impl Service { if persistent_shards != expect_shards { tracing::error!("Consistency check failed on shards."); + tracing::error!( "Shards in memory: {}", serde_json::to_string(&expect_shards) @@ -5291,12 +5295,57 @@ impl Service { serde_json::to_string(&persistent_shards) .map_err(|e| ApiError::InternalServerError(e.into()))? ); + + // The total dump log lines above are useful in testing but in the field grafana will + // usually just drop them because they're so large. So we also do some explicit logging + // of just the diffs. + let persistent_shards = persistent_shards + .into_iter() + .map(|tsp| (tsp.get_tenant_shard_id().unwrap(), tsp)) + .collect::>(); + let expect_shards = expect_shards + .into_iter() + .map(|tsp| (tsp.get_tenant_shard_id().unwrap(), tsp)) + .collect::>(); + for (tenant_shard_id, persistent_tsp) in &persistent_shards { + match expect_shards.get(tenant_shard_id) { + None => { + tracing::error!( + "Shard {} found in database but not in memory", + tenant_shard_id + ); + } + Some(expect_tsp) => { + if expect_tsp != persistent_tsp { + tracing::error!( + "Shard {} is inconsistent. In memory: {}, database has: {}", + tenant_shard_id, + serde_json::to_string(expect_tsp).unwrap(), + serde_json::to_string(&persistent_tsp).unwrap() + ); + } + } + } + } + + // Having already logged any differences, log any shards that simply aren't present in the database + for (tenant_shard_id, memory_tsp) in &expect_shards { + if !persistent_shards.contains_key(tenant_shard_id) { + tracing::error!( + "Shard {} found in memory but not in database: {}", + tenant_shard_id, + serde_json::to_string(memory_tsp) + .map_err(|e| ApiError::InternalServerError(e.into()))? + ); + } + } + return Err(ApiError::InternalServerError(anyhow::anyhow!( "Shard consistency failure" ))); } - Ok(()) + node_result } /// For debug/support: a JSON dump of the [`Scheduler`]. Returns a response so that