From dfb0a6fdaf8eecbfacf5b1d9a5dc62ec26bd64c9 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Tue, 12 Dec 2023 16:53:08 +0000
Subject: [PATCH] scrubber: handle initdb files, fix an issue with prefixes
 (#6079)

- The code for calculating the prefix in the bucket was expecting a
trailing slash (as it is in the tests), but that's an awkward
expectation to impose for use in the field: make the code more flexible
by only trimming a trailing character if it is indeed a slash.
- initdb archives were detected by the scrubber as malformed layer
files. Teach it to recognize and ignore them.
---
 s3_scrubber/src/checks.rs        | 22 +++++++++++++++++++++-
 s3_scrubber/src/lib.rs           |  4 +++-
 s3_scrubber/src/main.rs          | 13 ++++++++++++-
 s3_scrubber/src/scan_metadata.rs |  4 ++++
 4 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/s3_scrubber/src/checks.rs b/s3_scrubber/src/checks.rs
index 510a128663..a15a908212 100644
--- a/s3_scrubber/src/checks.rs
+++ b/s3_scrubber/src/checks.rs
@@ -142,7 +142,9 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                         .collect();
 
                     if !orphan_layers.is_empty() {
-                        result.errors.push(format!(
+                        // An orphan layer is not an error: it's arguably not even a warning, but it is helpful to report
+                        // these as a hint that there is something worth cleaning up here.
+                        result.warnings.push(format!(
                             "index_part.json does not contain layers from S3: {:?}",
                             orphan_layers
                                 .iter()
@@ -170,6 +172,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                         ));
                     }
                 }
+                BlobDataParseResult::Relic => {}
                 BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
                     parse_errors
                         .into_iter()
@@ -215,6 +218,8 @@ pub(crate) enum BlobDataParseResult {
         index_part_generation: Generation,
         s3_layers: HashSet<(LayerFileName, Generation)>,
     },
+    /// The remains of a deleted Timeline (i.e. an initdb archive only)
+    Relic,
     Incorrect(Vec<String>),
 }
 
@@ -245,6 +250,7 @@ pub(crate) async fn list_timeline_blobs(
     timeline_dir_target.delimiter = String::new();
 
     let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
+    let mut initdb_archive: bool = false;
 
     let stream = stream_listing(s3_client, &timeline_dir_target);
     pin_mut!(stream);
@@ -258,6 +264,10 @@ pub(crate) async fn list_timeline_blobs(
                 tracing::info!("Index key {key}");
                 index_parts.push(obj)
             }
+            Some("initdb.tar.zst") => {
+                tracing::info!("initdb archive {key}");
+                initdb_archive = true;
+            }
             Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
                 Ok((new_layer, gen)) => {
                     tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
@@ -279,6 +289,16 @@ pub(crate) async fn list_timeline_blobs(
         }
     }
 
+    if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
+        tracing::info!(
+            "Timeline is empty apart from initdb archive: expected post-deletion state."
+        );
+        return Ok(S3TimelineBlobData {
+            blob_data: BlobDataParseResult::Relic,
+            keys_to_remove: Vec::new(),
+        });
+    }
+
     // Choose the index_part with the highest generation
     let (index_part_object, index_part_generation) = match index_parts
         .iter()
diff --git a/s3_scrubber/src/lib.rs b/s3_scrubber/src/lib.rs
index e5465952fb..6607db21e6 100644
--- a/s3_scrubber/src/lib.rs
+++ b/s3_scrubber/src/lib.rs
@@ -86,7 +86,9 @@ impl S3Target {
         if new_self.prefix_in_bucket.is_empty() {
             new_self.prefix_in_bucket = format!("/{}/", new_segment);
         } else {
-            let _ = new_self.prefix_in_bucket.pop();
+            if new_self.prefix_in_bucket.ends_with('/') {
+                new_self.prefix_in_bucket.pop();
+            }
             new_self.prefix_in_bucket =
                 [&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
         }
diff --git a/s3_scrubber/src/main.rs b/s3_scrubber/src/main.rs
index 1f0ceebdaf..ef020edc2a 100644
--- a/s3_scrubber/src/main.rs
+++ b/s3_scrubber/src/main.rs
@@ -57,7 +57,7 @@ async fn main() -> anyhow::Result<()> {
     ));
 
     match cli.command {
-        Command::ScanMetadata { json } => match scan_metadata(bucket_config).await {
+        Command::ScanMetadata { json } => match scan_metadata(bucket_config.clone()).await {
             Err(e) => {
                 tracing::error!("Failed: {e}");
                 Err(e)
@@ -70,6 +70,17 @@ async fn main() -> anyhow::Result<()> {
                 }
                 if summary.is_fatal() {
                     Err(anyhow::anyhow!("Fatal scrub errors detected"))
+                } else if summary.is_empty() {
+                    // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
+                    // scrubber they were likely expecting to scan something, and if we see no timelines
+                    // at all then it's likely due to some configuration issues like a bad prefix
+                    Err(anyhow::anyhow!(
+                        "No timelines found in bucket {} prefix {}",
+                        bucket_config.bucket,
+                        bucket_config
+                            .prefix_in_bucket
+                            .unwrap_or("<none>".to_string())
+                    ))
                 } else {
                     Ok(())
                 }
diff --git a/s3_scrubber/src/scan_metadata.rs b/s3_scrubber/src/scan_metadata.rs
index ad82db1e76..228f8d6763 100644
--- a/s3_scrubber/src/scan_metadata.rs
+++ b/s3_scrubber/src/scan_metadata.rs
@@ -174,6 +174,10 @@ Timeline layer count: {6}
     pub fn is_fatal(&self) -> bool {
         !self.with_errors.is_empty()
     }
+
+    pub fn is_empty(&self) -> bool {
+        self.count == 0
+    }
 }
 
 /// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics.