Remove non-gzipped basebackup code path

In July of 2023, Bojan and Chi authored 92aee7e07f. Our in production pageservers are most definitely at a version where they all support gzipped basebackups.
2026-01-05 20:42:54 +00:00 · 2024-10-29 23:03:45 -05:00
parent 8e2e9f0fed
commit 0c828c57e2
1 changed files with 4 additions and 28 deletions
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,7 +1,6 @@
 use std::collections::HashMap;
 use std::env;
 use std::fs;
-use std::io::BufRead;
 use std::os::unix::fs::{symlink, PermissionsExt};
 use std::path::Path;
 use std::process::{Command, Stdio};
@@ -365,8 +364,7 @@ impl ComputeNode {
        let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;

        let basebackup_cmd = match lsn {
-            // HACK We don't use compression on first start (Lsn(0)) because there's no API for it
-            Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id),
+            Lsn(0) => format!("basebackup {} {} --gzip", spec.tenant_id, spec.timeline_id),
            _ => format!(
                "basebackup {} {} {} --gzip",
                spec.tenant_id, spec.timeline_id, lsn
@@ -375,38 +373,16 @@ impl ComputeNode {

        let copyreader = client.copy_out(basebackup_cmd.as_str())?;
        let mut measured_reader = MeasuredReader::new(copyreader);
-
-        // Check the magic number to see if it's a gzip or not. Even though
-        // we might explicitly ask for gzip, an old pageserver with no implementation
-        // of gzip compression might send us uncompressed data. After some time
-        // passes we can assume all pageservers know how to compress and we can
-        // delete this check.
-        //
-        // If the data is not gzip, it will be tar. It will not be mistakenly
-        // recognized as gzip because tar starts with an ascii encoding of a filename,
-        // and 0x1f and 0x8b are unlikely first characters for any filename. Moreover,
-        // we send the "global" directory first from the pageserver, so it definitely
-        // won't be recognized as gzip.
        let mut bufreader = std::io::BufReader::new(&mut measured_reader);
-        let gzip = {
-            let peek = bufreader.fill_buf().unwrap();
-            peek[0] == 0x1f && peek[1] == 0x8b
-        };

        // Read the archive directly from the `CopyOutReader`
        //
        // Set `ignore_zeros` so that unpack() reads all the Copy data and
        // doesn't stop at the end-of-archive marker. Otherwise, if the server
        // sends an Error after finishing the tarball, we will not notice it.
-        if gzip {
-            let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
-            ar.set_ignore_zeros(true);
-            ar.unpack(&self.pgdata)?;
-        } else {
-            let mut ar = tar::Archive::new(&mut bufreader);
-            ar.set_ignore_zeros(true);
-            ar.unpack(&self.pgdata)?;
-        };
+        let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
+        ar.set_ignore_zeros(true);
+        ar.unpack(&self.pgdata)?;

        // Report metrics
        let mut state = self.state.lock().unwrap();