From e30f5fb922c44f47edddf4c0833f9a5fe95f33d0 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 5 Nov 2024 13:32:50 +0000 Subject: [PATCH] scrubber: remove AWS region assumption, tolerate negative max_project_size (#9636) ## Problem First issues noticed when trying to run scrubber find-garbage on Azure: - Azure staging contains projects with -1 set for max_project_size: apparently the control plane treats this as a signed field. - Scrubber code assumed that listing projects should filter to aws-$REGION. This is no longer needed (per comment in the code) because we know hit region-local APIs. This PR doesn't make it work all the way (`init_remote` still assumes S3), but these are necessary precursors. ## Summary of changes - Change max-project_size from unsigned to signed - Remove region filtering in favor of simply using the right region's API (which we already do) --- storage_scrubber/src/cloud_admin_api.rs | 6 +++--- storage_scrubber/src/garbage.rs | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/storage_scrubber/src/cloud_admin_api.rs b/storage_scrubber/src/cloud_admin_api.rs index 7b82a0b116..c9a62cd256 100644 --- a/storage_scrubber/src/cloud_admin_api.rs +++ b/storage_scrubber/src/cloud_admin_api.rs @@ -147,7 +147,7 @@ pub struct ProjectData { pub created_at: DateTime, pub updated_at: DateTime, pub pg_version: u32, - pub max_project_size: u64, + pub max_project_size: i64, pub remote_storage_size: u64, pub resident_size: u64, pub synthetic_storage_size: u64, @@ -261,7 +261,7 @@ impl CloudAdminApiClient { } } - pub async fn list_projects(&self, region_id: String) -> Result, Error> { + pub async fn list_projects(&self) -> Result, Error> { let _permit = self .request_limiter .acquire() @@ -318,7 +318,7 @@ impl CloudAdminApiClient { pagination_offset += response.data.len(); - result.extend(response.data.drain(..).filter(|t| t.region_id == region_id)); + result.append(&mut response.data); if pagination_offset >= response.total.unwrap_or(0) { break; diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs index a0040ada08..863dbf960d 100644 --- a/storage_scrubber/src/garbage.rs +++ b/storage_scrubber/src/garbage.rs @@ -160,9 +160,7 @@ async fn find_garbage_inner( // Build a set of console-known tenants, for quickly eliminating known-active tenants without having // to issue O(N) console API requests. let console_projects: HashMap = cloud_admin_api_client - // FIXME: we can't just assume that all console's region ids are aws-. This hack - // will go away when we are talking to Control Plane APIs, which are per-region. - .list_projects(format!("aws-{}", bucket_config.region)) + .list_projects() .await? .into_iter() .map(|t| (t.tenant, t))