From 82484e82415c85da2dac32c924c514d95efb81ab Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Thu, 1 Jun 2023 14:46:04 -0400 Subject: [PATCH] pgserver: add more metrics for better observability (#4323) ## Problem This PR includes doc changes to the current metrics as well as adding new metrics. With the new set of metrics, we can quantitatively analyze the read amp., write amp. and space amp. in the system, when used together with https://github.com/neondatabase/neonbench close https://github.com/neondatabase/neon/issues/4312 ref https://github.com/neondatabase/neon/issues/4347 compaction metrics TBD, a novel idea is to print L0 file number and number of layers in the system, and we can do this in the future when we start working on compaction. ## Summary of changes * Add `READ_NUM_FS_LAYERS` for computing read amp. * Add `MATERIALIZED_PAGE_CACHE_HIT_UPON_REQUEST`. * Add `GET_RECONSTRUCT_DATA_TIME`. GET_RECONSTRUCT_DATA_TIME + RECONSTRUCT_TIME + WAIT_LSN_TIME should be approximately total time of reads. * Add `5.0` and `10.0` to `STORAGE_IO_TIME_BUCKETS` given some fsync runs slow (i.e., > 1s) in some cases. * Some `WAL_REDO` metrics are only used when Postgres is involved in the redo process. --------- Signed-off-by: Alex Chi --- pageserver/src/metrics.rs | 55 +++++++++++++++++++++++++++++-- pageserver/src/tenant/timeline.rs | 15 ++++++++- test_runner/fixtures/metrics.py | 7 ++++ 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 75bea9dbab..cc444c479a 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -84,6 +84,16 @@ pub static STORAGE_TIME_GLOBAL: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static READ_NUM_FS_LAYERS: Lazy = Lazy::new(|| { + register_histogram_vec!( + "pageserver_read_num_fs_layers", + "Number of persistent layers accessed for processing a read request, including those in the cache", + &["tenant_id", "timeline_id"], + vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0], + ) + .expect("failed to define a metric") +}); + // Metrics collected on operations on the storage repository. static RECONSTRUCT_TIME: Lazy = Lazy::new(|| { register_histogram_vec!( @@ -95,6 +105,25 @@ static RECONSTRUCT_TIME: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_materialized_cache_hits_direct_total", + "Number of cache hits from materialized page cache without redo", + &["tenant_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + +static GET_RECONSTRUCT_DATA_TIME: Lazy = Lazy::new(|| { + register_histogram_vec!( + "pageserver_getpage_get_reconstruct_data_seconds", + "Time spent in get_reconstruct_value_data", + &["tenant_id", "timeline_id"], + CRITICAL_OP_BUCKETS.into(), + ) + .expect("failed to define a metric") +}); + static MATERIALIZED_PAGE_CACHE_HIT: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_materialized_cache_hits_total", @@ -354,6 +383,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[ 0.001000, // 1000 usec 0.030, // 30 ms 1.000, // 1000 ms + 30.000, // 30000 ms ]; const STORAGE_IO_TIME_OPERATIONS: &[&str] = &[ @@ -622,7 +652,7 @@ pub static WAL_REDO_TIME: Lazy = Lazy::new(|| { pub static WAL_REDO_WAIT_TIME: Lazy = Lazy::new(|| { register_histogram!( "pageserver_wal_redo_wait_seconds", - "Time spent waiting for access to the WAL redo process", + "Time spent waiting for access to the Postgres WAL redo process", redo_histogram_time_buckets!(), ) .expect("failed to define a metric") @@ -631,7 +661,7 @@ pub static WAL_REDO_WAIT_TIME: Lazy = Lazy::new(|| { pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy = Lazy::new(|| { register_histogram!( "pageserver_wal_redo_records_histogram", - "Histogram of number of records replayed per redo", + "Histogram of number of records replayed per redo in the Postgres WAL redo process", redo_histogram_count_buckets!(), ) .expect("failed to define a metric") @@ -640,7 +670,7 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy = Lazy::new(|| { pub static WAL_REDO_BYTES_HISTOGRAM: Lazy = Lazy::new(|| { register_histogram!( "pageserver_wal_redo_bytes_histogram", - "Histogram of number of records replayed per redo", + "Histogram of number of records replayed per redo sent to Postgres", redo_bytes_histogram_count_buckets!(), ) .expect("failed to define a metric") @@ -723,7 +753,9 @@ pub struct TimelineMetrics { tenant_id: String, timeline_id: String, pub reconstruct_time_histo: Histogram, + pub get_reconstruct_data_time_histo: Histogram, pub materialized_page_cache_hit_counter: GenericCounter, + pub materialized_page_cache_hit_upon_request_counter: GenericCounter, pub flush_time_histo: StorageTimeMetrics, pub compact_time_histo: StorageTimeMetrics, pub create_images_time_histo: StorageTimeMetrics, @@ -734,6 +766,7 @@ pub struct TimelineMetrics { pub last_record_gauge: IntGauge, pub wait_lsn_time_histo: Histogram, pub resident_physical_size_gauge: UIntGauge, + pub read_num_fs_layers: Histogram, /// copy of LayeredTimeline.current_logical_size pub current_logical_size_gauge: UIntGauge, pub num_persistent_files_created: IntCounter, @@ -753,6 +786,9 @@ impl TimelineMetrics { let reconstruct_time_histo = RECONSTRUCT_TIME .get_metric_with_label_values(&[&tenant_id, &timeline_id]) .unwrap(); + let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME + .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .unwrap(); let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT .get_metric_with_label_values(&[&tenant_id, &timeline_id]) .unwrap(); @@ -794,6 +830,12 @@ impl TimelineMetrics { let evictions = EVICTIONS .get_metric_with_label_values(&[&tenant_id, &timeline_id]) .unwrap(); + let read_num_fs_layers = READ_NUM_FS_LAYERS + .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .unwrap(); + let materialized_page_cache_hit_upon_request_counter = MATERIALIZED_PAGE_CACHE_HIT_DIRECT + .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .unwrap(); let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id); @@ -801,7 +843,9 @@ impl TimelineMetrics { tenant_id, timeline_id, reconstruct_time_histo, + get_reconstruct_data_time_histo, materialized_page_cache_hit_counter, + materialized_page_cache_hit_upon_request_counter, flush_time_histo, compact_time_histo, create_images_time_histo, @@ -819,6 +863,7 @@ impl TimelineMetrics { evictions_with_low_residence_duration: std::sync::RwLock::new( evictions_with_low_residence_duration, ), + read_num_fs_layers, } } } @@ -828,7 +873,9 @@ impl Drop for TimelineMetrics { let tenant_id = &self.tenant_id; let timeline_id = &self.timeline_id; let _ = RECONSTRUCT_TIME.remove_label_values(&[tenant_id, timeline_id]); + let _ = GET_RECONSTRUCT_DATA_TIME.remove_label_values(&[tenant_id, timeline_id]); let _ = MATERIALIZED_PAGE_CACHE_HIT.remove_label_values(&[tenant_id, timeline_id]); + let _ = MATERIALIZED_PAGE_CACHE_HIT_DIRECT.remove_label_values(&[tenant_id, timeline_id]); let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]); let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]); let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]); @@ -836,6 +883,8 @@ impl Drop for TimelineMetrics { let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]); let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]); let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]); + let _ = READ_NUM_FS_LAYERS.remove_label_values(&[tenant_id, timeline_id]); + self.evictions_with_low_residence_duration .write() .unwrap() diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 36c4b0bcd4..8885e761a2 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -525,7 +525,12 @@ impl Timeline { Some((cached_lsn, cached_img)) => { match cached_lsn.cmp(&lsn) { Ordering::Less => {} // there might be WAL between cached_lsn and lsn, we need to check - Ordering::Equal => return Ok(cached_img), // exact LSN match, return the image + Ordering::Equal => { + self.metrics + .materialized_page_cache_hit_upon_request_counter + .inc(); + return Ok(cached_img); // exact LSN match, return the image + } Ordering::Greater => { unreachable!("the returned lsn should never be after the requested lsn") } @@ -540,8 +545,10 @@ impl Timeline { img: cached_page_img, }; + let timer = self.metrics.get_reconstruct_data_time_histo.start_timer(); self.get_reconstruct_data(key, lsn, &mut reconstruct_state, ctx) .await?; + timer.stop_and_record(); self.metrics .reconstruct_time_histo @@ -2261,6 +2268,9 @@ impl Timeline { let mut timeline_owned; let mut timeline = self; + let mut read_count = + scopeguard::guard(0, |cnt| self.metrics.read_num_fs_layers.observe(cnt as f64)); + // For debugging purposes, collect the path of layers that we traversed // through. It's included in the error message if we fail to find the key. let mut traversal_path = Vec::::new(); @@ -2395,6 +2405,7 @@ impl Timeline { Err(e) => return Err(PageReconstructError::from(e)), }; cont_lsn = lsn_floor; + // metrics: open_layer does not count as fs access, so we are not updating `read_count` traversal_path.push(( result, cont_lsn, @@ -2421,6 +2432,7 @@ impl Timeline { Err(e) => return Err(PageReconstructError::from(e)), }; cont_lsn = lsn_floor; + // metrics: open_layer does not count as fs access, so we are not updating `read_count` traversal_path.push(( result, cont_lsn, @@ -2455,6 +2467,7 @@ impl Timeline { Err(e) => return Err(PageReconstructError::from(e)), }; cont_lsn = lsn_floor; + *read_count += 1; traversal_path.push(( result, cont_lsn, diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 0e958ddd06..b4c237cfa6 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -65,12 +65,19 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( "pageserver_getpage_reconstruct_seconds_bucket", "pageserver_getpage_reconstruct_seconds_count", "pageserver_getpage_reconstruct_seconds_sum", + "pageserver_getpage_get_reconstruct_data_seconds_bucket", + "pageserver_getpage_get_reconstruct_data_seconds_count", + "pageserver_getpage_get_reconstruct_data_seconds_sum", "pageserver_io_operations_bytes_total", "pageserver_io_operations_seconds_bucket", "pageserver_io_operations_seconds_count", "pageserver_io_operations_seconds_sum", "pageserver_last_record_lsn", "pageserver_materialized_cache_hits_total", + "pageserver_materialized_cache_hits_direct_total", + "pageserver_read_num_fs_layers_bucket", + "pageserver_read_num_fs_layers_count", + "pageserver_read_num_fs_layers_sum", "pageserver_smgr_query_seconds_bucket", "pageserver_smgr_query_seconds_count", "pageserver_smgr_query_seconds_sum",