Test prefetch miss

Create Release PR: review fixes
Replace per timeline pageserver_storage_operations_seconds with a global one (#3409 )
2026-06-01 12:30:38 +00:00 · 2023-01-30 16:24:51 -05:00 · 2023-01-30 16:15:22 +01:00 · 2023-01-30 17:10:29 +02:00 · 2023-01-30 16:08:23 +02:00 · 2023-01-30 14:00:44 +01:00
10 changed files with 215 additions and 64 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -337,7 +337,7 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  benchmarks:
-    runs-on: [ self-hosted, gen3, large ]
+    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -405,7 +405,7 @@ jobs:
          DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json

  coverage-report:
-    runs-on: [ self-hosted, gen3, large ]
+    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -481,7 +481,7 @@ jobs:
            }"

  trigger-e2e-tests:
-    runs-on: [ self-hosted, gen3, large ]
+    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
      options: --init
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -0,0 +1,33 @@
+name: Create Release Branch
+
+on:
+  schedule:
+    - cron: '0 10 * * 2'
+
+jobs:
+  create_release_branch:
+    runs-on: [ubuntu-latest]
+
+    steps:
+    - name: Check out code
+      uses: actions/checkout@v3
+      with:
+        ref: main
+
+    - name: Get current date
+      id: date
+      run: echo "date=(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+    - name: Create release branch
+      run: git checkout -b release/${{ steps.date.outputs.date }}
+
+    - name: Push new branch
+      run: git push origin release/${{ steps.date.outputs.date }}
+
+    - name: Create pull request into release
+      uses: thomaseizinger/create-pull-request@e3972219c86a56550fb70708d96800d8e24ba862 # 1.3.0
+      with:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        head: release/${{ steps.date.outputs.date }}
+        base: release
+        title: Release ${{ steps.date.outputs.date }}
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -8,6 +8,7 @@ pub use prometheus::opts;
 pub use prometheus::register;
 pub use prometheus::{core, default_registry, proto};
 pub use prometheus::{exponential_buckets, linear_buckets};
+pub use prometheus::{register_counter_vec, Counter, CounterVec};
 pub use prometheus::{register_gauge, Gauge};
 pub use prometheus::{register_gauge_vec, GaugeVec};
 pub use prometheus::{register_histogram, Histogram};
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -87,9 +87,7 @@ fn apierror_from_prerror(err: PageReconstructError) -> ApiError {
        PageReconstructError::NeedsDownload(_, _) => {
            // This shouldn't happen, because we use a RequestContext that requests to
            // download any missing layer files on-demand.
-            ApiError::InternalServerError(anyhow::anyhow!(
-                "would need to download remote layer file"
-            ))
+            ApiError::InternalServerError(anyhow::anyhow!("need to download remote layer file"))
        }
        PageReconstructError::Cancelled => {
            ApiError::InternalServerError(anyhow::anyhow!("request was cancelled"))
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1,8 +1,9 @@
 use metrics::core::{AtomicU64, GenericCounter};
 use metrics::{
-    register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec,
-    register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec, Histogram, HistogramVec,
-    IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
+    register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
+    register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec,
+    Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
+    UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
 use pageserver_api::models::state;
@@ -36,11 +37,29 @@ const STORAGE_TIME_OPERATIONS: &[&str] = &[
    "gc",
 ];

-pub static STORAGE_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
-        "pageserver_storage_operations_seconds",
-        "Time spent on storage operations",
+pub static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
+    register_counter_vec!(
+        "pageserver_storage_operations_seconds_sum",
+        "Total time spent on storage operations with operation, tenant and timeline dimensions",
        &["operation", "tenant_id", "timeline_id"],
+    )
+    .expect("failed to define a metric")
+});
+
+pub static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_storage_operations_seconds_count",
+        "Count of storage operations with operation, tenant and timeline dimensions",
+        &["operation", "tenant_id", "timeline_id"],
+    )
+    .expect("failed to define a metric")
+});
+
+pub static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "pageserver_storage_operations_seconds_global",
+        "Time spent on storage operations",
+        &["operation"],
        get_buckets_for_critical_operations(),
    )
    .expect("failed to define a metric")
@@ -394,18 +413,81 @@ pub static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
    .unwrap()
 });

+/// Similar to [`prometheus::HistogramTimer`] but does not record on drop.
+pub struct StorageTimeMetricsTimer {
+    metrics: StorageTimeMetrics,
+    start: Instant,
+}
+
+impl StorageTimeMetricsTimer {
+    fn new(metrics: StorageTimeMetrics) -> Self {
+        Self {
+            metrics,
+            start: Instant::now(),
+        }
+    }
+
+    /// Record the time from creation to now.
+    pub fn stop_and_record(self) {
+        let duration = self.start.elapsed().as_secs_f64();
+        self.metrics.timeline_sum.inc_by(duration);
+        self.metrics.timeline_count.inc();
+        self.metrics.global_histogram.observe(duration);
+    }
+}
+
+/// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
+/// timeline total sum and count.
+#[derive(Clone, Debug)]
+pub struct StorageTimeMetrics {
+    /// Sum of f64 seconds, per operation, tenant_id and timeline_id
+    timeline_sum: Counter,
+    /// Number of oeprations, per operation, tenant_id and timeline_id
+    timeline_count: IntCounter,
+    /// Global histogram having only the "operation" label.
+    global_histogram: Histogram,
+}
+
+impl StorageTimeMetrics {
+    pub fn new(operation: &str, tenant_id: &str, timeline_id: &str) -> Self {
+        let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
+            .get_metric_with_label_values(&[operation, tenant_id, timeline_id])
+            .unwrap();
+        let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
+            .get_metric_with_label_values(&[operation, tenant_id, timeline_id])
+            .unwrap();
+        let global_histogram = STORAGE_TIME_GLOBAL
+            .get_metric_with_label_values(&[operation])
+            .unwrap();
+
+        StorageTimeMetrics {
+            timeline_sum,
+            timeline_count,
+            global_histogram,
+        }
+    }
+
+    /// Starts timing a new operation.
+    ///
+    /// Note: unlike [`prometheus::HistogramTimer`] the returned timer does not record on drop.
+    pub fn start_timer(&self) -> StorageTimeMetricsTimer {
+        StorageTimeMetricsTimer::new(self.clone())
+    }
+}
+
 #[derive(Debug)]
 pub struct TimelineMetrics {
    tenant_id: String,
    timeline_id: String,
    pub reconstruct_time_histo: Histogram,
    pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
-    pub flush_time_histo: Histogram,
-    pub compact_time_histo: Histogram,
-    pub create_images_time_histo: Histogram,
-    pub init_logical_size_histo: Histogram,
-    pub logical_size_histo: Histogram,
-    pub load_layer_map_histo: Histogram,
+    pub flush_time_histo: StorageTimeMetrics,
+    pub compact_time_histo: StorageTimeMetrics,
+    pub create_images_time_histo: StorageTimeMetrics,
+    pub init_logical_size_histo: StorageTimeMetrics,
+    pub logical_size_histo: StorageTimeMetrics,
+    pub load_layer_map_histo: StorageTimeMetrics,
+    pub garbage_collect_histo: StorageTimeMetrics,
    pub last_record_gauge: IntGauge,
    pub wait_lsn_time_histo: Histogram,
    pub resident_physical_size_gauge: UIntGauge,
@@ -425,24 +507,16 @@ impl TimelineMetrics {
        let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
-        let flush_time_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["layer flush", &tenant_id, &timeline_id])
-            .unwrap();
-        let compact_time_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["compact", &tenant_id, &timeline_id])
-            .unwrap();
-        let create_images_time_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["create images", &tenant_id, &timeline_id])
-            .unwrap();
-        let init_logical_size_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["init logical size", &tenant_id, &timeline_id])
-            .unwrap();
-        let logical_size_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["logical size", &tenant_id, &timeline_id])
-            .unwrap();
-        let load_layer_map_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["load layer map", &tenant_id, &timeline_id])
-            .unwrap();
+        let flush_time_histo = StorageTimeMetrics::new("layer flush", &tenant_id, &timeline_id);
+        let compact_time_histo = StorageTimeMetrics::new("compact", &tenant_id, &timeline_id);
+        let create_images_time_histo =
+            StorageTimeMetrics::new("create images", &tenant_id, &timeline_id);
+        let init_logical_size_histo =
+            StorageTimeMetrics::new("init logical size", &tenant_id, &timeline_id);
+        let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id);
+        let load_layer_map_histo =
+            StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id);
+        let garbage_collect_histo = StorageTimeMetrics::new("gc", &tenant_id, &timeline_id);
        let last_record_gauge = LAST_RECORD_LSN
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
@@ -472,6 +546,7 @@ impl TimelineMetrics {
            create_images_time_histo,
            init_logical_size_histo,
            logical_size_histo,
+            garbage_collect_histo,
            load_layer_map_histo,
            last_record_gauge,
            wait_lsn_time_histo,
@@ -497,7 +572,10 @@ impl Drop for TimelineMetrics {
        let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);

        for op in STORAGE_TIME_OPERATIONS {
-            let _ = STORAGE_TIME.remove_label_values(&[op, tenant_id, timeline_id]);
+            let _ =
+                STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
+            let _ =
+                STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
        }
        for op in STORAGE_IO_TIME_OPERATIONS {
            let _ = STORAGE_IO_TIME.remove_label_values(&[op, tenant_id, timeline_id]);
@@ -514,7 +592,6 @@ impl Drop for TimelineMetrics {
 }

 pub fn remove_tenant_metrics(tenant_id: &TenantId) {
-    let _ = STORAGE_TIME.remove_label_values(&["gc", &tenant_id.to_string(), "-"]);
    let tid = tenant_id.to_string();
    for state in TENANT_STATE_OPTIONS {
        let _ = TENANT_STATE_METRIC.remove_label_values(&[&tid, state]);
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -51,8 +51,7 @@ use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir;
 use crate::is_uninit_mark;
-use crate::metrics::TENANT_STATE_METRIC;
-use crate::metrics::{remove_tenant_metrics, STORAGE_TIME};
+use crate::metrics::{remove_tenant_metrics, TENANT_STATE_METRIC};
 use crate::repository::GcResult;
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
@@ -1243,17 +1242,11 @@ impl Tenant {
            "Cannot run GC iteration on inactive tenant"
        );

-        let timeline_str = target_timeline_id
-            .map(|x| x.to_string())
-            .unwrap_or_else(|| "-".to_string());
+        let gc_result = self
+            .gc_iteration_internal(target_timeline_id, horizon, pitr, ctx)
+            .await;

-        {
-            let _timer = STORAGE_TIME
-                .with_label_values(&["gc", &self.tenant_id.to_string(), &timeline_str])
-                .start_timer();
-            self.gc_iteration_internal(target_timeline_id, horizon, pitr, ctx)
-                .await
-        }
+        gc_result
    }

    /// Perform one compaction iteration.
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -15,6 +15,7 @@ use pageserver_api::models::{
 use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
+use utils::id::TenantTimelineId;

 use std::cmp::{max, min, Ordering};
 use std::collections::HashMap;
@@ -132,7 +133,6 @@ pub struct Timeline {
    ancestor_timeline: Option<Arc<Timeline>>,
    ancestor_lsn: Lsn,

-    // Metrics
    metrics: TimelineMetrics,

    /// Ensures layers aren't frozen by checkpointer between
@@ -382,7 +382,7 @@ pub enum PageReconstructError {
    Other(#[from] anyhow::Error), // source and Display delegate to anyhow::Error

    /// The operation would require downloading a layer that is missing locally.
-    NeedsDownload(Weak<Timeline>, Weak<RemoteLayer>),
+    NeedsDownload(TenantTimelineId, LayerFileName),

    /// The operation was cancelled
    Cancelled,
@@ -396,7 +396,14 @@ impl std::fmt::Debug for PageReconstructError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
        match self {
            Self::Other(err) => err.fmt(f),
-            Self::NeedsDownload(_tli, _layer) => write!(f, "needs download"),
+            Self::NeedsDownload(tenant_timeline_id, layer_file_name) => {
+                write!(
+                    f,
+                    "layer {}/{} needs download",
+                    tenant_timeline_id,
+                    layer_file_name.file_name()
+                )
+            }
            Self::Cancelled => write!(f, "cancelled"),
            Self::WalRedo(err) => err.fmt(f),
        }
@@ -407,7 +414,14 @@ impl std::fmt::Display for PageReconstructError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
        match self {
            Self::Other(err) => err.fmt(f),
-            Self::NeedsDownload(_tli, _layer) => write!(f, "needs download"),
+            Self::NeedsDownload(tenant_timeline_id, layer_file_name) => {
+                write!(
+                    f,
+                    "layer {}/{} needs download",
+                    tenant_timeline_id,
+                    layer_file_name.file_name()
+                )
+            }
            Self::Cancelled => write!(f, "cancelled"),
            Self::WalRedo(err) => err.fmt(f),
        }
@@ -1879,8 +1893,8 @@ impl Timeline {
                    }
                    (DownloadBehavior::Error, false) => {
                        return Err(PageReconstructError::NeedsDownload(
-                            timeline.myself.clone(),
-                            Arc::downgrade(&remote_layer),
+                            TenantTimelineId::new(self.tenant_id, self.timeline_id),
+                            remote_layer.file_name.clone(),
                        ))
                    }
                }
@@ -2879,6 +2893,8 @@ impl Timeline {
    /// obsolete.
    ///
    pub(super) async fn gc(&self) -> anyhow::Result<GcResult> {
+        let timer = self.metrics.garbage_collect_histo.start_timer();
+
        fail_point!("before-timeline-gc");

        let _layer_removal_cs = self.layer_removal_cs.lock().await;
@@ -2899,11 +2915,17 @@ impl Timeline {

        let new_gc_cutoff = Lsn::min(horizon_cutoff, pitr_cutoff);

-        self.gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff)
+        let res = self
+            .gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff)
            .instrument(
                info_span!("gc_timeline", timeline = %self.timeline_id, cutoff = %new_gc_cutoff),
            )
-            .await
+            .await?;
+
+        // only record successes
+        timer.stop_and_record();
+
+        Ok(res)
    }

    async fn gc_timeline(
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -46,6 +46,12 @@ PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS: Tuple[str, ...] = (
    "pageserver_remote_physical_size",
 )

+PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = (
+    "pageserver_storage_operations_seconds_global_count",
+    "pageserver_storage_operations_seconds_global_sum",
+    "pageserver_storage_operations_seconds_global_bucket",
+)
+
 PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
    "pageserver_current_logical_size",
    "pageserver_resident_physical_size",
@@ -61,9 +67,8 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
    "pageserver_smgr_query_seconds_bucket",
    "pageserver_smgr_query_seconds_count",
    "pageserver_smgr_query_seconds_sum",
-    "pageserver_storage_operations_seconds_bucket",
-    "pageserver_storage_operations_seconds_count",
-    "pageserver_storage_operations_seconds_sum",
+    "pageserver_storage_operations_seconds_count_total",
+    "pageserver_storage_operations_seconds_sum_total",
    "pageserver_wait_lsn_seconds_bucket",
    "pageserver_wait_lsn_seconds_count",
    "pageserver_wait_lsn_seconds_sum",
--- a/test_runner/performance/test_seqscans.py
+++ b/test_runner/performance/test_seqscans.py
@@ -35,11 +35,17 @@ from pytest_lazyfixture import lazy_fixture  # type: ignore
    ],
 )
 def test_seqscans(env: PgCompare, scale: int, rows: int, iters: int, workers: int):
-    rows = scale * rows
+    rows = scale * rows * 10

    with closing(env.pg.connect(options="-cstatement_timeout=0")) as conn:
        with conn.cursor() as cur:
+            cur.execute("set enable_seqscan_prefetch=on")
+            cur.execute("set max_parallel_workers_per_gather=0")
+
            cur.execute("drop table if exists t;")
+
+
+
            cur.execute("create table t (i integer);")
            cur.execute(f"insert into t values (generate_series(1,{rows}));")

@@ -62,4 +68,11 @@ def test_seqscans(env: PgCompare, scale: int, rows: int, iters: int, workers: in

            with env.record_duration("run"):
                for i in range(iters):
-                    cur.execute("select count(*) from t;")
+                    cur.execute("explain (analyze, prefetch) select count(*) from t;")
+                    result = cur.fetchall()
+                    prefetch_stats = result[1][0]
+
+                    import re
+                    regex = re.compile(r'([\S]+)=(\S+)')
+                    parsed = dict(regex.findall(prefetch_stats))
+                    print("FFFF", parsed)
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -9,6 +9,7 @@ from typing import List
 import pytest
 from fixtures.log_helper import log
 from fixtures.metrics import (
+    PAGESERVER_GLOBAL_METRICS,
    PAGESERVER_PER_TENANT_METRICS,
    PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS,
    parse_metrics,
@@ -161,6 +162,14 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
            f"process_start_time_seconds (UTC): {datetime.fromtimestamp(metrics.query_one('process_start_time_seconds').value)}"
        )

+    # Test (a subset of) pageserver global metrics
+    for metric in PAGESERVER_GLOBAL_METRICS:
+        ps_samples = ps_metrics.query_all(metric, {})
+        assert len(ps_samples) > 0
+        for sample in ps_samples:
+            labels = ",".join([f'{key}="{value}"' for key, value in sample.labels.items()])
+            log.info(f"{sample.name}{{{labels}}} {sample.value}")
+

@pytest.mark.parametrize(
    "remote_storage_kind",
Author	SHA1	Message	Date
Bojan Serafimov	b7803f8df7	Test prefetch miss	2023-01-30 16:24:51 -05:00
Vadim Kharitonov	ec0e641578	Create Release PR: review fixes	2023-01-30 16:15:22 +01:00
Lassi Pölönen	20b38acff0	Replace per timeline `pageserver_storage_operations_seconds` with a global one (#3409 ) Related to: https://github.com/neondatabase/neon/issues/2848 `pageserver_storage_operations_seconds` is the most expensive metric we have, as there are a lot of tenants/timelines and the histogram had 42 buckets. These are quite sparse too, so instead of having a histogram per timeline, create a new histogram `pageserver_storage_operations_seconds_global` without tenant and timeline dimensions and replace `pageserver_storage_operations_seconds` with sum and counter. Co-authored-by: Joonas Koivunen <joonas@neon.tech>	2023-01-30 17:10:29 +02:00
Kirill Bulatov	c61bc25ef9	Clean up NeedsDownload error (#3464 )	2023-01-30 16:08:23 +02:00
Rory de Zoete	7bb13569b3	Switch more jobs to small runner (#3483 ) As these jobs don't benefit from additional cores Co-authored-by: Rory de Zoete <rdezoete@RorysMacStudio.fritz.box>	2023-01-30 14:00:44 +01:00
Vadim Kharitonov	5fc233964a	Create release PR	2023-01-30 12:44:48 +01:00