From 0fd3cd27cb7ac66df5938bf219e9f12ce7b78c8a Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 9 Feb 2024 17:37:30 +0200 Subject: [PATCH] Tighten up the check for garbage after end-of-tar. Turn the warning into an error, if there is garbage after the end of imported tar file. However, it's normal for 'tar' to append extra empty blocks to the end, so tolerate those without warnings or errors. --- pageserver/src/page_service.rs | 17 ++++++++++++----- test_runner/regress/test_import.py | 10 +++------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 6fc38a76d4..7b660b5eca 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -91,8 +91,8 @@ const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000); /// `tokio_tar` already read the first such block. Read the second all-zeros block, /// and check that there is no more data after the EOF marker. /// -/// XXX: Currently, any trailing data after the EOF marker prints a warning. -/// Perhaps it should be a hard error? +/// 'tar' command can also write extra blocks of zeros, up to a record +/// size, controlled by the --record-size argument. Ignore them too. async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<()> { use tokio::io::AsyncReadExt; let mut buf = [0u8; 512]; @@ -113,17 +113,24 @@ async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<() anyhow::bail!("invalid tar EOF marker"); } - // Drain any data after the EOF marker + // Drain any extra zero-blocks after the EOF marker let mut trailing_bytes = 0; + let mut seen_nonzero_bytes = false; loop { let nbytes = reader.read(&mut buf).await?; trailing_bytes += nbytes; + if !buf.iter().all(|&x| x == 0) { + seen_nonzero_bytes = true; + } if nbytes == 0 { break; } } - if trailing_bytes > 0 { - warn!("ignored {trailing_bytes} unexpected bytes after the tar archive"); + if seen_nonzero_bytes { + anyhow::bail!("unexpected non-zero bytes after the tar archive"); + } + if trailing_bytes % 512 != 0 { + anyhow::bail!("unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive"); } Ok(()) } diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py index 3519cbbaab..7942f5cc9b 100644 --- a/test_runner/regress/test_import.py +++ b/test_runner/regress/test_import.py @@ -95,7 +95,6 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build ".*InternalServerError.*Tenant .* not found.*", ".*InternalServerError.*Timeline .* not found.*", ".*InternalServerError.*Cannot delete timeline which has child timelines.*", - ".*ignored .* unexpected bytes after the tar archive.*", ] ) @@ -142,12 +141,9 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build with pytest.raises(RuntimeError): import_tar(corrupt_base_tar, wal_tar) - # A tar with trailing garbage is currently accepted. It prints a warnings - # to the pageserver log, however. Check that. - import_tar(base_plus_garbage_tar, wal_tar) - assert env.pageserver.log_contains( - ".*WARN.*ignored .* unexpected bytes after the tar archive.*" - ) + # Importing a tar with trailing garbage fails + with pytest.raises(RuntimeError): + import_tar(base_plus_garbage_tar, wal_tar) client = env.pageserver.http_client() timeline_delete_wait_completed(client, tenant, timeline)