From d5ae9db997711d770b52511f8bbd2eef8067cedc Mon Sep 17 00:00:00 2001 From: bojanserafimov Date: Thu, 14 Apr 2022 10:09:03 -0400 Subject: [PATCH] Add s3 cost estimate to tests (#1478) --- pageserver/src/layered_repository.rs | 22 ++++++++++++++++- test_runner/fixtures/benchmark_fixture.py | 30 ++++++++++------------- test_runner/fixtures/compare_fixtures.py | 13 ++++++++++ 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 95df385cfe..36b081e400 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -49,7 +49,8 @@ use crate::CheckpointConfig; use crate::{ZTenantId, ZTimelineId}; use zenith_metrics::{ - register_histogram_vec, register_int_gauge_vec, Histogram, HistogramVec, IntGauge, IntGaugeVec, + register_histogram_vec, register_int_counter, register_int_gauge_vec, Histogram, HistogramVec, + IntCounter, IntGauge, IntGaugeVec, }; use zenith_utils::crashsafe_dir; use zenith_utils::lsn::{AtomicLsn, Lsn, RecordLsn}; @@ -109,6 +110,21 @@ lazy_static! { .expect("failed to define a metric"); } +// Metrics for cloud upload. These metrics reflect data uploaded to cloud storage, +// or in testing they estimate how much we would upload if we did. +lazy_static! { + static ref NUM_PERSISTENT_FILES_CREATED: IntCounter = register_int_counter!( + "pageserver_num_persistent_files_created", + "Number of files created that are meant to be uploaded to cloud storage", + ) + .expect("failed to define a metric"); + static ref PERSISTENT_BYTES_WRITTEN: IntCounter = register_int_counter!( + "pageserver_persistent_bytes_written", + "Total bytes written that are meant to be uploaded to cloud storage", + ) + .expect("failed to define a metric"); +} + /// Parts of the `.zenith/tenants//timelines/` directory prefix. pub const TIMELINES_SEGMENT_NAME: &str = "timelines"; @@ -1524,6 +1540,10 @@ impl LayeredTimeline { &metadata, false, )?; + + NUM_PERSISTENT_FILES_CREATED.inc_by(1); + PERSISTENT_BYTES_WRITTEN.inc_by(new_delta_path.metadata()?.len()); + if self.upload_layers.load(atomic::Ordering::Relaxed) { schedule_timeline_checkpoint_upload( self.tenantid, diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index a904233e98..0735f16d73 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -236,10 +236,18 @@ class ZenithBenchmarker: """ Fetch the "cumulative # of bytes written" metric from the pageserver """ - # Fetch all the exposed prometheus metrics from page server - all_metrics = pageserver.http_client().get_metrics() - # Use a regular expression to extract the one we're interested in - # + metric_name = r'pageserver_disk_io_bytes{io_operation="write"}' + return self.get_int_counter_value(pageserver, metric_name) + + def get_peak_mem(self, pageserver) -> int: + """ + Fetch the "maxrss" metric from the pageserver + """ + metric_name = r'pageserver_maxrss_kb' + return self.get_int_counter_value(pageserver, metric_name) + + def get_int_counter_value(self, pageserver, metric_name) -> int: + """Fetch the value of given int counter from pageserver metrics.""" # TODO: If we start to collect more of the prometheus metrics in the # performance test suite like this, we should refactor this to load and # parse all the metrics into a more convenient structure in one go. @@ -247,20 +255,8 @@ class ZenithBenchmarker: # The metric should be an integer, as it's a number of bytes. But in general # all prometheus metrics are floats. So to be pedantic, read it as a float # and round to integer. - matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$', - all_metrics, - re.MULTILINE) - assert matches - return int(round(float(matches.group(1)))) - - def get_peak_mem(self, pageserver) -> int: - """ - Fetch the "maxrss" metric from the pageserver - """ - # Fetch all the exposed prometheus metrics from page server all_metrics = pageserver.http_client().get_metrics() - # See comment in get_io_writes() - matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics, re.MULTILINE) + matches = re.search(fr'^{metric_name} (\S+)$', all_metrics, re.MULTILINE) assert matches return int(round(float(matches.group(1)))) diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index 3c6a923587..93912d2da7 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -105,6 +105,19 @@ class ZenithCompare(PgCompare): 'MB', report=MetricReport.LOWER_IS_BETTER) + total_files = self.zenbenchmark.get_int_counter_value( + self.env.pageserver, "pageserver_num_persistent_files_created") + total_bytes = self.zenbenchmark.get_int_counter_value( + self.env.pageserver, "pageserver_persistent_bytes_written") + self.zenbenchmark.record("data_uploaded", + total_bytes / (1024 * 1024), + "MB", + report=MetricReport.LOWER_IS_BETTER) + self.zenbenchmark.record("num_files_uploaded", + total_files, + "", + report=MetricReport.LOWER_IS_BETTER) + def record_pageserver_writes(self, out_name): return self.zenbenchmark.record_pageserver_writes(self.env.pageserver, out_name)