From 8fbe437768e2545855a0f928794abc679b7f9d0b Mon Sep 17 00:00:00 2001 From: bojanserafimov Date: Tue, 18 Oct 2022 11:53:28 -0400 Subject: [PATCH] Improve pageserver IO metrics (#2629) --- pageserver/src/metrics.rs | 20 ++++++++++++++------ test_runner/fixtures/compare_fixtures.py | 5 +++-- test_runner/fixtures/metrics.py | 2 ++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 5c2f81d731..b654be031c 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -107,18 +107,20 @@ static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage, // or in testing they estimate how much we would upload if we did. -static NUM_PERSISTENT_FILES_CREATED: Lazy = Lazy::new(|| { - register_int_counter!( +static NUM_PERSISTENT_FILES_CREATED: Lazy = Lazy::new(|| { + register_int_counter_vec!( "pageserver_created_persistent_files_total", "Number of files created that are meant to be uploaded to cloud storage", + &["tenant_id", "timeline_id"] ) .expect("failed to define a metric") }); -static PERSISTENT_BYTES_WRITTEN: Lazy = Lazy::new(|| { - register_int_counter!( +static PERSISTENT_BYTES_WRITTEN: Lazy = Lazy::new(|| { + register_int_counter_vec!( "pageserver_written_persistent_bytes_total", "Total bytes written that are meant to be uploaded to cloud storage", + &["tenant_id", "timeline_id"] ) .expect("failed to define a metric") }); @@ -386,8 +388,12 @@ impl TimelineMetrics { let current_logical_size_gauge = CURRENT_LOGICAL_SIZE .get_metric_with_label_values(&[&tenant_id, &timeline_id]) .unwrap(); - let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED.clone(); - let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN.clone(); + let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED + .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .unwrap(); + let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN + .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .unwrap(); TimelineMetrics { tenant_id, @@ -419,6 +425,8 @@ impl Drop for TimelineMetrics { let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]); let _ = CURRENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]); let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]); + let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]); + let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]); for op in STORAGE_TIME_OPERATIONS { let _ = STORAGE_TIME.remove_label_values(&[op, tenant_id, timeline_id]); diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index 78a12c6c45..2d36d90bd6 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -130,11 +130,12 @@ class NeonCompare(PgCompare): "size", timeline_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER ) + params = f'{{tenant_id="{self.env.initial_tenant}",timeline_id="{self.timeline}"}}' total_files = self.zenbenchmark.get_int_counter_value( - self.env.pageserver, "pageserver_created_persistent_files_total" + self.env.pageserver, "pageserver_created_persistent_files_total" + params ) total_bytes = self.zenbenchmark.get_int_counter_value( - self.env.pageserver, "pageserver_written_persistent_bytes_total" + self.env.pageserver, "pageserver_written_persistent_bytes_total" + params ) self.zenbenchmark.record( "data_uploaded", total_bytes / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 4d680aa641..62e3cbbe99 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -60,4 +60,6 @@ PAGESERVER_PER_TENANT_METRICS = [ "pageserver_wait_lsn_seconds_bucket", "pageserver_wait_lsn_seconds_count", "pageserver_wait_lsn_seconds_sum", + "pageserver_created_persistent_files_total", + "pageserver_written_persistent_bytes_total", ]