Rename S3 scrubber to storage scrubber (#8013)

The S3 scrubber contains "S3" in its name, but we want to make it
generic in terms of which storage is used (#7547). Therefore, rename it
to "storage scrubber", following the naming scheme of already existing
components "storage broker" and "storage controller".

Part of #7547
This commit is contained in:
Arpad Müller
2024-06-12 00:45:22 +02:00
committed by GitHub
parent 78a59b94f5
commit 27518676d7
21 changed files with 75 additions and 75 deletions

View File

@@ -21,7 +21,7 @@
!patches/
!pgxn/
!proxy/
!s3_scrubber/
!storage_scrubber/
!safekeeper/
!storage_broker/
!storage_controller/

96
Cargo.lock generated
View File

@@ -5109,54 +5109,6 @@ version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
[[package]]
name = "s3_scrubber"
version = "0.1.0"
dependencies = [
"anyhow",
"async-stream",
"aws-config",
"aws-sdk-s3",
"aws-smithy-async",
"bincode",
"bytes",
"camino",
"chrono",
"clap",
"crc32c",
"either",
"futures",
"futures-util",
"hex",
"histogram",
"humantime",
"itertools",
"once_cell",
"pageserver",
"pageserver_api",
"postgres_ffi",
"rand 0.8.5",
"remote_storage",
"reqwest 0.12.4",
"rustls 0.22.4",
"rustls-native-certs 0.7.0",
"serde",
"serde_json",
"serde_with",
"thiserror",
"tokio",
"tokio-postgres",
"tokio-postgres-rustls",
"tokio-rustls 0.25.0",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-appender",
"tracing-subscriber",
"utils",
"workspace_hack",
]
[[package]]
name = "safekeeper"
version = "0.1.0"
@@ -5813,6 +5765,54 @@ dependencies = [
"workspace_hack",
]
[[package]]
name = "storage_scrubber"
version = "0.1.0"
dependencies = [
"anyhow",
"async-stream",
"aws-config",
"aws-sdk-s3",
"aws-smithy-async",
"bincode",
"bytes",
"camino",
"chrono",
"clap",
"crc32c",
"either",
"futures",
"futures-util",
"hex",
"histogram",
"humantime",
"itertools",
"once_cell",
"pageserver",
"pageserver_api",
"postgres_ffi",
"rand 0.8.5",
"remote_storage",
"reqwest 0.12.4",
"rustls 0.22.4",
"rustls-native-certs 0.7.0",
"serde",
"serde_json",
"serde_with",
"thiserror",
"tokio",
"tokio-postgres",
"tokio-postgres-rustls",
"tokio-rustls 0.25.0",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-appender",
"tracing-subscriber",
"utils",
"workspace_hack",
]
[[package]]
name = "storcon_cli"
version = "0.1.0"

View File

@@ -13,7 +13,7 @@ members = [
"safekeeper",
"storage_broker",
"storage_controller",
"s3_scrubber",
"storage_scrubber",
"workspace_hack",
"trace",
"libs/compute_api",

View File

@@ -1,5 +1,5 @@
[package]
name = "s3_scrubber"
name = "storage_scrubber"
version = "0.1.0"
edition.workspace = true
license.workspace = true

View File

@@ -1,4 +1,4 @@
# Neon S3 scrubber
# Neon Storage Scrubber
This tool directly accesses the S3 buckets used by the Neon `pageserver`
and `safekeeper`, and does housekeeping such as cleaning up objects for tenants & timelines that no longer exist.

View File

@@ -1,11 +1,11 @@
use anyhow::bail;
use camino::Utf8PathBuf;
use pageserver_api::shard::TenantShardId;
use s3_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode};
use s3_scrubber::pageserver_physical_gc::GcMode;
use s3_scrubber::scan_pageserver_metadata::scan_metadata;
use s3_scrubber::tenant_snapshot::SnapshotDownloader;
use s3_scrubber::{
use storage_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode};
use storage_scrubber::pageserver_physical_gc::GcMode;
use storage_scrubber::scan_pageserver_metadata::scan_metadata;
use storage_scrubber::tenant_snapshot::SnapshotDownloader;
use storage_scrubber::{
init_logging, pageserver_physical_gc::pageserver_physical_gc,
scan_safekeeper_metadata::scan_safekeeper_metadata, BucketConfig, ConsoleConfig, NodeKind,
TraversingDepth,

View File

@@ -833,7 +833,7 @@ class NeonEnvBuilder:
def enable_scrub_on_exit(self):
"""
Call this if you would like the fixture to automatically run
s3_scrubber at the end of the test, as a bidirectional test
storage_scrubber at the end of the test, as a bidirectional test
that the scrubber is working properly, and that the code within
the test didn't produce any invalid remote state.
"""
@@ -948,7 +948,7 @@ class NeonEnvBuilder:
if self.scrub_on_exit:
try:
S3Scrubber(self).scan_metadata()
StorageScrubber(self).scan_metadata()
except Exception as e:
log.error(f"Error during remote storage scrub: {e}")
cleanup_error = e
@@ -3937,7 +3937,7 @@ class Safekeeper(LogUtils):
wait_until(20, 0.5, paused)
class S3Scrubber:
class StorageScrubber:
def __init__(self, env: NeonEnvBuilder, log_dir: Optional[Path] = None):
self.env = env
self.log_dir = log_dir or env.test_output_dir
@@ -3957,7 +3957,7 @@ class S3Scrubber:
if s3_storage.endpoint is not None:
env.update({"AWS_ENDPOINT_URL": s3_storage.endpoint})
base_args = [str(self.env.neon_binpath / "s3_scrubber")]
base_args = [str(self.env.neon_binpath / "storage_scrubber")]
args = base_args + args
(output_path, stdout, status_code) = subprocess_capture(

View File

@@ -22,7 +22,7 @@ from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
PgBin,
S3Scrubber,
StorageScrubber,
generate_uploads_and_deletions,
)
from fixtures.pageserver.common_types import parse_layer_file_name
@@ -215,7 +215,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
# Having written a mixture of generation-aware and legacy index_part.json,
# ensure the scrubber handles the situation as expected.
metadata_summary = S3Scrubber(neon_env_builder).scan_metadata()
metadata_summary = StorageScrubber(neon_env_builder).scan_metadata()
assert metadata_summary["tenant_count"] == 1 # Scrubber should have seen our timeline
assert metadata_summary["timeline_count"] == 1
assert metadata_summary["timeline_shard_count"] == 1

View File

@@ -7,7 +7,7 @@ from typing import Any, Dict, Optional
import pytest
from fixtures.common_types import TenantId, TimelineId
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver, S3Scrubber
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver, StorageScrubber
from fixtures.pageserver.common_types import parse_layer_file_name
from fixtures.pageserver.utils import (
assert_prefix_empty,
@@ -214,7 +214,7 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
# Having done a bunch of attach/detach cycles, we will have generated some index garbage: check
# that the scrubber sees it and cleans it up. We do this before the final attach+validate pass,
# to also validate that the scrubber isn't breaking anything.
gc_summary = S3Scrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=1)
gc_summary = StorageScrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=1)
assert gc_summary["remote_storage_errors"] == 0
assert gc_summary["indices_deleted"] > 0
@@ -536,7 +536,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
# Scrub the remote storage
# ========================
# This confirms that the scrubber isn't upset by the presence of the heatmap
S3Scrubber(neon_env_builder).scan_metadata()
StorageScrubber(neon_env_builder).scan_metadata()
# Detach secondary and delete tenant
# ===================================

View File

@@ -11,8 +11,8 @@ from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
S3Scrubber,
StorageControllerApiException,
StorageScrubber,
last_flush_lsn_upload,
tenant_get_shards,
wait_for_last_flush_lsn,
@@ -128,7 +128,7 @@ def test_sharding_smoke(
# Check the scrubber isn't confused by sharded content, then disable
# it during teardown because we'll have deleted by then
S3Scrubber(neon_env_builder).scan_metadata()
StorageScrubber(neon_env_builder).scan_metadata()
neon_env_builder.scrub_on_exit = False
env.storage_controller.pageserver_api().tenant_delete(tenant_id)

View File

@@ -6,7 +6,7 @@ import pytest
from fixtures.common_types import TenantId, TenantShardId, TimelineId
from fixtures.neon_fixtures import (
NeonEnvBuilder,
S3Scrubber,
StorageScrubber,
)
from fixtures.remote_storage import S3Storage, s3_storage
from fixtures.workload import Workload
@@ -60,7 +60,7 @@ def test_scrubber_tenant_snapshot(neon_env_builder: NeonEnvBuilder, shard_count:
output_path = neon_env_builder.test_output_dir / "snapshot"
os.makedirs(output_path)
scrubber = S3Scrubber(neon_env_builder)
scrubber = StorageScrubber(neon_env_builder)
scrubber.tenant_snapshot(tenant_id, output_path)
assert len(os.listdir(output_path)) > 0
@@ -143,18 +143,18 @@ def test_scrubber_physical_gc(neon_env_builder: NeonEnvBuilder, shard_count: Opt
workload.write_rows(1)
# With a high min_age, the scrubber should decline to delete anything
gc_summary = S3Scrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=3600)
gc_summary = StorageScrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=3600)
assert gc_summary["remote_storage_errors"] == 0
assert gc_summary["indices_deleted"] == 0
# If targeting a different tenant, the scrubber shouldn't do anything
gc_summary = S3Scrubber(neon_env_builder).pageserver_physical_gc(
gc_summary = StorageScrubber(neon_env_builder).pageserver_physical_gc(
min_age_secs=1, tenant_ids=[TenantId.generate()]
)
assert gc_summary["remote_storage_errors"] == 0
assert gc_summary["indices_deleted"] == 0
# With a low min_age, the scrubber should go ahead and clean up all but the latest 2 generations
gc_summary = S3Scrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=1)
gc_summary = StorageScrubber(neon_env_builder).pageserver_physical_gc(min_age_secs=1)
assert gc_summary["remote_storage_errors"] == 0
assert gc_summary["indices_deleted"] == (expect_indices_per_shard - 2) * shard_count

View File

@@ -10,7 +10,7 @@ from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
S3Scrubber,
StorageScrubber,
last_flush_lsn_upload,
wait_for_last_flush_lsn,
)
@@ -707,7 +707,7 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder)
remote_storage_kind = RemoteStorageKind.MOCK_S3
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
scrubber = S3Scrubber(neon_env_builder)
scrubber = StorageScrubber(neon_env_builder)
env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
ps_http = env.pageserver.http_client()