diff --git a/.dockerignore b/.dockerignore index eead727994..c7a2f78e32 100644 --- a/.dockerignore +++ b/.dockerignore @@ -21,7 +21,7 @@ !patches/ !pgxn/ !proxy/ -!s3_scrubber/ +!storage_scrubber/ !safekeeper/ !storage_broker/ !storage_controller/ diff --git a/Cargo.lock b/Cargo.lock index 66879fd743..1c8a8b0c0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5109,54 +5109,6 @@ version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" -[[package]] -name = "s3_scrubber" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-stream", - "aws-config", - "aws-sdk-s3", - "aws-smithy-async", - "bincode", - "bytes", - "camino", - "chrono", - "clap", - "crc32c", - "either", - "futures", - "futures-util", - "hex", - "histogram", - "humantime", - "itertools", - "once_cell", - "pageserver", - "pageserver_api", - "postgres_ffi", - "rand 0.8.5", - "remote_storage", - "reqwest 0.12.4", - "rustls 0.22.4", - "rustls-native-certs 0.7.0", - "serde", - "serde_json", - "serde_with", - "thiserror", - "tokio", - "tokio-postgres", - "tokio-postgres-rustls", - "tokio-rustls 0.25.0", - "tokio-stream", - "tokio-util", - "tracing", - "tracing-appender", - "tracing-subscriber", - "utils", - "workspace_hack", -] - [[package]] name = "safekeeper" version = "0.1.0" @@ -5813,6 +5765,54 @@ dependencies = [ "workspace_hack", ] +[[package]] +name = "storage_scrubber" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-stream", + "aws-config", + "aws-sdk-s3", + "aws-smithy-async", + "bincode", + "bytes", + "camino", + "chrono", + "clap", + "crc32c", + "either", + "futures", + "futures-util", + "hex", + "histogram", + "humantime", + "itertools", + "once_cell", + "pageserver", + "pageserver_api", + "postgres_ffi", + "rand 0.8.5", + "remote_storage", + "reqwest 0.12.4", + "rustls 0.22.4", + "rustls-native-certs 0.7.0", + "serde", + "serde_json", + "serde_with", + "thiserror", + "tokio", + "tokio-postgres", + "tokio-postgres-rustls", + "tokio-rustls 0.25.0", + "tokio-stream", + "tokio-util", + "tracing", + "tracing-appender", + "tracing-subscriber", + "utils", + "workspace_hack", +] + [[package]] name = "storcon_cli" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 58715db32b..dc89c2341b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ members = [ "safekeeper", "storage_broker", "storage_controller", - "s3_scrubber", + "storage_scrubber", "workspace_hack", "trace", "libs/compute_api", diff --git a/s3_scrubber/Cargo.toml b/storage_scrubber/Cargo.toml similarity index 98% rename from s3_scrubber/Cargo.toml rename to storage_scrubber/Cargo.toml index 48b50ca21c..050be66483 100644 --- a/s3_scrubber/Cargo.toml +++ b/storage_scrubber/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "s3_scrubber" +name = "storage_scrubber" version = "0.1.0" edition.workspace = true license.workspace = true diff --git a/s3_scrubber/README.md b/storage_scrubber/README.md similarity index 99% rename from s3_scrubber/README.md rename to storage_scrubber/README.md index 8a96542ada..0930f343ec 100644 --- a/s3_scrubber/README.md +++ b/storage_scrubber/README.md @@ -1,4 +1,4 @@ -# Neon S3 scrubber +# Neon Storage Scrubber This tool directly accesses the S3 buckets used by the Neon `pageserver` and `safekeeper`, and does housekeeping such as cleaning up objects for tenants & timelines that no longer exist. diff --git a/s3_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs similarity index 100% rename from s3_scrubber/src/checks.rs rename to storage_scrubber/src/checks.rs diff --git a/s3_scrubber/src/cloud_admin_api.rs b/storage_scrubber/src/cloud_admin_api.rs similarity index 100% rename from s3_scrubber/src/cloud_admin_api.rs rename to storage_scrubber/src/cloud_admin_api.rs diff --git a/s3_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs similarity index 100% rename from s3_scrubber/src/garbage.rs rename to storage_scrubber/src/garbage.rs diff --git a/s3_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs similarity index 100% rename from s3_scrubber/src/lib.rs rename to storage_scrubber/src/lib.rs diff --git a/s3_scrubber/src/main.rs b/storage_scrubber/src/main.rs similarity index 96% rename from s3_scrubber/src/main.rs rename to storage_scrubber/src/main.rs index ade8ef7d7a..222bd10ed2 100644 --- a/s3_scrubber/src/main.rs +++ b/storage_scrubber/src/main.rs @@ -1,11 +1,11 @@ use anyhow::bail; use camino::Utf8PathBuf; use pageserver_api::shard::TenantShardId; -use s3_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode}; -use s3_scrubber::pageserver_physical_gc::GcMode; -use s3_scrubber::scan_pageserver_metadata::scan_metadata; -use s3_scrubber::tenant_snapshot::SnapshotDownloader; -use s3_scrubber::{ +use storage_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode}; +use storage_scrubber::pageserver_physical_gc::GcMode; +use storage_scrubber::scan_pageserver_metadata::scan_metadata; +use storage_scrubber::tenant_snapshot::SnapshotDownloader; +use storage_scrubber::{ init_logging, pageserver_physical_gc::pageserver_physical_gc, scan_safekeeper_metadata::scan_safekeeper_metadata, BucketConfig, ConsoleConfig, NodeKind, TraversingDepth, diff --git a/s3_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs similarity index 100% rename from s3_scrubber/src/metadata_stream.rs rename to storage_scrubber/src/metadata_stream.rs diff --git a/s3_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs similarity index 100% rename from s3_scrubber/src/pageserver_physical_gc.rs rename to storage_scrubber/src/pageserver_physical_gc.rs diff --git a/s3_scrubber/src/scan_pageserver_metadata.rs b/storage_scrubber/src/scan_pageserver_metadata.rs similarity index 100% rename from s3_scrubber/src/scan_pageserver_metadata.rs rename to storage_scrubber/src/scan_pageserver_metadata.rs diff --git a/s3_scrubber/src/scan_safekeeper_metadata.rs b/storage_scrubber/src/scan_safekeeper_metadata.rs similarity index 100% rename from s3_scrubber/src/scan_safekeeper_metadata.rs rename to storage_scrubber/src/scan_safekeeper_metadata.rs diff --git a/s3_scrubber/src/tenant_snapshot.rs b/storage_scrubber/src/tenant_snapshot.rs similarity index 100% rename from s3_scrubber/src/tenant_snapshot.rs rename to storage_scrubber/src/tenant_snapshot.rs diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 6fdad2188c..394f5283f3 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -833,7 +833,7 @@ class NeonEnvBuilder: def enable_scrub_on_exit(self): """ Call this if you would like the fixture to automatically run - s3_scrubber at the end of the test, as a bidirectional test + storage_scrubber at the end of the test, as a bidirectional test that the scrubber is working properly, and that the code within the test didn't produce any invalid remote state. """ @@ -948,7 +948,7 @@ class NeonEnvBuilder: if self.scrub_on_exit: try: - S3Scrubber(self).scan_metadata() + StorageScrubber(self).scan_metadata() except Exception as e: log.error(f"Error during remote storage scrub: {e}") cleanup_error = e @@ -3937,7 +3937,7 @@ class Safekeeper(LogUtils): wait_until(20, 0.5, paused) -class S3Scrubber: +class StorageScrubber: def __init__(self, env: NeonEnvBuilder, log_dir: Optional[Path] = None): self.env = env self.log_dir = log_dir or env.test_output_dir @@ -3957,7 +3957,7 @@ class S3Scrubber: if s3_storage.endpoint is not None: env.update({"AWS_ENDPOINT_URL": s3_storage.endpoint}) - base_args = [str(self.env.neon_binpath / "s3_scrubber")] + base_args = [str(self.env.neon_binpath / "storage_scrubber")] args = base_args + args (output_path, stdout, status_code) = subprocess_capture( diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index 0235cf6d20..696af24e5c 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -22,7 +22,7 @@ from fixtures.neon_fixtures import ( NeonEnv, NeonEnvBuilder, PgBin, - S3Scrubber, + StorageScrubber, generate_uploads_and_deletions, ) from fixtures.pageserver.common_types import parse_layer_file_name @@ -215,7 +215,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder): # Having written a mixture of generation-aware and legacy index_part.json, # ensure the scrubber handles the situation as expected. - metadata_summary = S3Scrubber(neon_env_builder).scan_metadata() + metadata_summary = StorageScrubber(neon_env_builder).scan_metadata() assert metadata_summary["tenant_count"] == 1 # Scrubber should have seen our timeline assert metadata_summary["timeline_count"] == 1 assert metadata_summary["timeline_shard_count"] == 1 diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index 757ea60882..2782d33e15 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -7,7 +7,7 @@ from typing import Any, Dict, Optional import pytest from fixtures.common_types import TenantId, TimelineId from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver, S3Scrubber +from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver, StorageScrubber from fixtures.pageserver.common_types import parse_layer_file_name from fixtures.pageserver.utils import ( assert_prefix_empty, @@ -214,7 +214,7 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int): # Having done a bunch of attach/detach cycles, we will have generated some index garbage: check # that the scrubber sees it and cleans it up. We do this before the final attach+validate pass, # to also validate that the scrubber isn't breaking anything. - gc_summary = S3Scrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=1) + gc_summary = StorageScrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=1) assert gc_summary["remote_storage_errors"] == 0 assert gc_summary["indices_deleted"] > 0 @@ -536,7 +536,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): # Scrub the remote storage # ======================== # This confirms that the scrubber isn't upset by the presence of the heatmap - S3Scrubber(neon_env_builder).scan_metadata() + StorageScrubber(neon_env_builder).scan_metadata() # Detach secondary and delete tenant # =================================== diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index 1996e99557..56075c5975 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -11,8 +11,8 @@ from fixtures.log_helper import log from fixtures.neon_fixtures import ( NeonEnv, NeonEnvBuilder, - S3Scrubber, StorageControllerApiException, + StorageScrubber, last_flush_lsn_upload, tenant_get_shards, wait_for_last_flush_lsn, @@ -128,7 +128,7 @@ def test_sharding_smoke( # Check the scrubber isn't confused by sharded content, then disable # it during teardown because we'll have deleted by then - S3Scrubber(neon_env_builder).scan_metadata() + StorageScrubber(neon_env_builder).scan_metadata() neon_env_builder.scrub_on_exit = False env.storage_controller.pageserver_api().tenant_delete(tenant_id) diff --git a/test_runner/regress/test_s3_scrubber.py b/test_runner/regress/test_storage_scrubber.py similarity index 94% rename from test_runner/regress/test_s3_scrubber.py rename to test_runner/regress/test_storage_scrubber.py index 6baba190f3..35ae61c380 100644 --- a/test_runner/regress/test_s3_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -6,7 +6,7 @@ import pytest from fixtures.common_types import TenantId, TenantShardId, TimelineId from fixtures.neon_fixtures import ( NeonEnvBuilder, - S3Scrubber, + StorageScrubber, ) from fixtures.remote_storage import S3Storage, s3_storage from fixtures.workload import Workload @@ -60,7 +60,7 @@ def test_scrubber_tenant_snapshot(neon_env_builder: NeonEnvBuilder, shard_count: output_path = neon_env_builder.test_output_dir / "snapshot" os.makedirs(output_path) - scrubber = S3Scrubber(neon_env_builder) + scrubber = StorageScrubber(neon_env_builder) scrubber.tenant_snapshot(tenant_id, output_path) assert len(os.listdir(output_path)) > 0 @@ -143,18 +143,18 @@ def test_scrubber_physical_gc(neon_env_builder: NeonEnvBuilder, shard_count: Opt workload.write_rows(1) # With a high min_age, the scrubber should decline to delete anything - gc_summary = S3Scrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=3600) + gc_summary = StorageScrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=3600) assert gc_summary["remote_storage_errors"] == 0 assert gc_summary["indices_deleted"] == 0 # If targeting a different tenant, the scrubber shouldn't do anything - gc_summary = S3Scrubber(neon_env_builder).pageserver_physical_gc( + gc_summary = StorageScrubber(neon_env_builder).pageserver_physical_gc( min_age_secs=1, tenant_ids=[TenantId.generate()] ) assert gc_summary["remote_storage_errors"] == 0 assert gc_summary["indices_deleted"] == 0 # With a low min_age, the scrubber should go ahead and clean up all but the latest 2 generations - gc_summary = S3Scrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=1) + gc_summary = StorageScrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=1) assert gc_summary["remote_storage_errors"] == 0 assert gc_summary["indices_deleted"] == (expect_indices_per_shard - 2) * shard_count diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index fa7cead1bd..fd3cc45c3f 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -10,7 +10,7 @@ from fixtures.log_helper import log from fixtures.neon_fixtures import ( NeonEnvBuilder, PgBin, - S3Scrubber, + StorageScrubber, last_flush_lsn_upload, wait_for_last_flush_lsn, ) @@ -707,7 +707,7 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder) remote_storage_kind = RemoteStorageKind.MOCK_S3 neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind) - scrubber = S3Scrubber(neon_env_builder) + scrubber = StorageScrubber(neon_env_builder) env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG) ps_http = env.pageserver.http_client()