diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 6fc38a76d4..7b660b5eca 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -91,8 +91,8 @@ const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000); /// `tokio_tar` already read the first such block. Read the second all-zeros block, /// and check that there is no more data after the EOF marker. /// -/// XXX: Currently, any trailing data after the EOF marker prints a warning. -/// Perhaps it should be a hard error? +/// 'tar' command can also write extra blocks of zeros, up to a record +/// size, controlled by the --record-size argument. Ignore them too. async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<()> { use tokio::io::AsyncReadExt; let mut buf = [0u8; 512]; @@ -113,17 +113,24 @@ async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<() anyhow::bail!("invalid tar EOF marker"); } - // Drain any data after the EOF marker + // Drain any extra zero-blocks after the EOF marker let mut trailing_bytes = 0; + let mut seen_nonzero_bytes = false; loop { let nbytes = reader.read(&mut buf).await?; trailing_bytes += nbytes; + if !buf.iter().all(|&x| x == 0) { + seen_nonzero_bytes = true; + } if nbytes == 0 { break; } } - if trailing_bytes > 0 { - warn!("ignored {trailing_bytes} unexpected bytes after the tar archive"); + if seen_nonzero_bytes { + anyhow::bail!("unexpected non-zero bytes after the tar archive"); + } + if trailing_bytes % 512 != 0 { + anyhow::bail!("unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive"); } Ok(()) } diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py index 3519cbbaab..7942f5cc9b 100644 --- a/test_runner/regress/test_import.py +++ b/test_runner/regress/test_import.py @@ -95,7 +95,6 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build ".*InternalServerError.*Tenant .* not found.*", ".*InternalServerError.*Timeline .* not found.*", ".*InternalServerError.*Cannot delete timeline which has child timelines.*", - ".*ignored .* unexpected bytes after the tar archive.*", ] ) @@ -142,12 +141,9 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build with pytest.raises(RuntimeError): import_tar(corrupt_base_tar, wal_tar) - # A tar with trailing garbage is currently accepted. It prints a warnings - # to the pageserver log, however. Check that. - import_tar(base_plus_garbage_tar, wal_tar) - assert env.pageserver.log_contains( - ".*WARN.*ignored .* unexpected bytes after the tar archive.*" - ) + # Importing a tar with trailing garbage fails + with pytest.raises(RuntimeError): + import_tar(base_plus_garbage_tar, wal_tar) client = env.pageserver.http_client() timeline_delete_wait_completed(client, tenant, timeline)