metrics: smgr query time: add a pre-aggregated histogram (#5064)

When doing global queries in VictoriaMetrics, the per-timeline
histograms make us run into cardinality limits.

We don't want to give them up just yet because we don't
have an alternative for drilling down on timeline-specific
performance issues.

So, add a pre-aggregated histogram and add observations to it
whenever we add observations to the per-timeline histogram.

While we're at it, switch to using a strummed enum for the operation
type names.
This commit is contained in:
Christian Schwarz
2023-08-22 19:08:31 +02:00
committed by GitHub
parent 933a869f00
commit 8cd20485f8
3 changed files with 168 additions and 52 deletions

View File

@@ -6,7 +6,7 @@ use metrics::{
HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
};
use once_cell::sync::Lazy;
use strum::VariantNames;
use strum::{EnumCount, IntoEnumIterator, VariantNames};
use strum_macros::{EnumVariantNames, IntoStaticStr};
use utils::id::{TenantId, TimelineId};
@@ -570,23 +570,160 @@ pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
.expect("failed to define a metric")
});
const SMGR_QUERY_TIME_OPERATIONS: &[&str] = &[
"get_rel_exists",
"get_rel_size",
"get_page_at_lsn",
"get_db_size",
];
#[derive(Debug)]
struct GlobalAndPerTimelineHistogram {
global: Histogram,
per_tenant_timeline: Histogram,
}
pub static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
impl GlobalAndPerTimelineHistogram {
fn observe(&self, value: f64) {
self.global.observe(value);
self.per_tenant_timeline.observe(value);
}
}
struct GlobalAndPerTimelineHistogramTimer<'a> {
h: &'a GlobalAndPerTimelineHistogram,
start: std::time::Instant,
}
impl<'a> Drop for GlobalAndPerTimelineHistogramTimer<'a> {
fn drop(&mut self) {
let elapsed = self.start.elapsed();
self.h.observe(elapsed.as_secs_f64());
}
}
#[derive(
Debug,
Clone,
Copy,
IntoStaticStr,
strum_macros::EnumCount,
strum_macros::EnumIter,
strum_macros::FromRepr,
)]
#[strum(serialize_all = "snake_case")]
pub enum SmgrQueryType {
GetRelExists,
GetRelSize,
GetPageAtLsn,
GetDbSize,
}
#[derive(Debug)]
pub struct SmgrQueryTimePerTimeline {
metrics: [GlobalAndPerTimelineHistogram; SmgrQueryType::COUNT],
}
static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!(
"pageserver_smgr_query_seconds",
"Time spent on smgr query handling",
"Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
&["smgr_query_type", "tenant_id", "timeline_id"],
CRITICAL_OP_BUCKETS.into(),
)
.expect("failed to define a metric")
});
static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!(
"pageserver_smgr_query_seconds_global",
"Time spent on smgr query handling, aggregated by query type.",
&["smgr_query_type"],
CRITICAL_OP_BUCKETS.into(),
)
.expect("failed to define a metric")
});
impl SmgrQueryTimePerTimeline {
pub(crate) fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
let tenant_id = tenant_id.to_string();
let timeline_id = timeline_id.to_string();
let metrics = std::array::from_fn(|i| {
let op = SmgrQueryType::from_repr(i).unwrap();
let global = SMGR_QUERY_TIME_GLOBAL
.get_metric_with_label_values(&[op.into()])
.unwrap();
let per_tenant_timeline = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
.get_metric_with_label_values(&[op.into(), &tenant_id, &timeline_id])
.unwrap();
GlobalAndPerTimelineHistogram {
global,
per_tenant_timeline,
}
});
Self { metrics }
}
pub(crate) fn start_timer(&self, op: SmgrQueryType) -> impl Drop + '_ {
let metric = &self.metrics[op as usize];
GlobalAndPerTimelineHistogramTimer {
h: metric,
start: std::time::Instant::now(),
}
}
}
#[cfg(test)]
mod smgr_query_time_tests {
use strum::IntoEnumIterator;
use utils::id::{TenantId, TimelineId};
// Regression test, we used hard-coded string constants before using an enum.
#[test]
fn op_label_name() {
use super::SmgrQueryType::*;
let expect: [(super::SmgrQueryType, &'static str); 4] = [
(GetRelExists, "get_rel_exists"),
(GetRelSize, "get_rel_size"),
(GetPageAtLsn, "get_page_at_lsn"),
(GetDbSize, "get_db_size"),
];
for (op, expect) in expect {
let actual: &'static str = op.into();
assert_eq!(actual, expect);
}
}
#[test]
fn basic() {
let ops: Vec<_> = super::SmgrQueryType::iter().collect();
for op in &ops {
let tenant_id = TenantId::generate();
let timeline_id = TimelineId::generate();
let metrics = super::SmgrQueryTimePerTimeline::new(&tenant_id, &timeline_id);
let get_counts = || {
let global: u64 = ops
.iter()
.map(|op| metrics.metrics[*op as usize].global.get_sample_count())
.sum();
let per_tenant_timeline: u64 = ops
.iter()
.map(|op| {
metrics.metrics[*op as usize]
.per_tenant_timeline
.get_sample_count()
})
.sum();
(global, per_tenant_timeline)
};
let (pre_global, pre_per_tenant_timeline) = get_counts();
assert_eq!(pre_per_tenant_timeline, 0);
let timer = metrics.start_timer(*op);
drop(timer);
let (post_global, post_per_tenant_timeline) = get_counts();
assert_eq!(post_per_tenant_timeline, 1);
assert!(post_global > pre_global);
}
}
}
// keep in sync with control plane Go code so that we can validate
// compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
@@ -1045,6 +1182,12 @@ impl Drop for TimelineMetrics {
.write()
.unwrap()
.remove(tenant_id, timeline_id);
// The following metrics are born outside of the TimelineMetrics lifecycle but still
// removed at the end of it. The idea is to have the metrics outlive the
// entity during which they're observed, e.g., the smgr metrics shall
// outlive an individual smgr connection, but not the timeline.
for op in StorageTimeOperation::VARIANTS {
let _ =
STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
@@ -1056,8 +1199,12 @@ impl Drop for TimelineMetrics {
let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, timeline_id]);
}
for op in SMGR_QUERY_TIME_OPERATIONS {
let _ = SMGR_QUERY_TIME.remove_label_values(&[op, tenant_id, timeline_id]);
for op in SmgrQueryType::iter() {
let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
op.into(),
tenant_id,
timeline_id,
]);
}
}
}

View File

@@ -50,7 +50,8 @@ use crate::basebackup;
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::import_datadir::import_wal_from_tar;
use crate::metrics::{LIVE_CONNECTIONS_COUNT, SMGR_QUERY_TIME};
use crate::metrics;
use crate::metrics::LIVE_CONNECTIONS_COUNT;
use crate::task_mgr;
use crate::task_mgr::TaskKind;
use crate::tenant;
@@ -306,39 +307,6 @@ async fn page_service_conn_main(
}
}
struct PageRequestMetrics {
get_rel_exists: metrics::Histogram,
get_rel_size: metrics::Histogram,
get_page_at_lsn: metrics::Histogram,
get_db_size: metrics::Histogram,
}
impl PageRequestMetrics {
fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
let tenant_id = tenant_id.to_string();
let timeline_id = timeline_id.to_string();
let get_rel_exists =
SMGR_QUERY_TIME.with_label_values(&["get_rel_exists", &tenant_id, &timeline_id]);
let get_rel_size =
SMGR_QUERY_TIME.with_label_values(&["get_rel_size", &tenant_id, &timeline_id]);
let get_page_at_lsn =
SMGR_QUERY_TIME.with_label_values(&["get_page_at_lsn", &tenant_id, &timeline_id]);
let get_db_size =
SMGR_QUERY_TIME.with_label_values(&["get_db_size", &tenant_id, &timeline_id]);
Self {
get_rel_exists,
get_rel_size,
get_page_at_lsn,
get_db_size,
}
}
}
struct PageServerHandler {
_conf: &'static PageServerConf,
broker_client: storage_broker::BrokerClientChannel,
@@ -406,7 +374,7 @@ impl PageServerHandler {
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
pgb.flush().await?;
let metrics = PageRequestMetrics::new(&tenant_id, &timeline_id);
let metrics = metrics::SmgrQueryTimePerTimeline::new(&tenant_id, &timeline_id);
loop {
let msg = tokio::select! {
@@ -446,21 +414,21 @@ impl PageServerHandler {
let response = match neon_fe_msg {
PagestreamFeMessage::Exists(req) => {
let _timer = metrics.get_rel_exists.start_timer();
let _timer = metrics.start_timer(metrics::SmgrQueryType::GetRelExists);
self.handle_get_rel_exists_request(&timeline, &req, &ctx)
.await
}
PagestreamFeMessage::Nblocks(req) => {
let _timer = metrics.get_rel_size.start_timer();
let _timer = metrics.start_timer(metrics::SmgrQueryType::GetRelSize);
self.handle_get_nblocks_request(&timeline, &req, &ctx).await
}
PagestreamFeMessage::GetPage(req) => {
let _timer = metrics.get_page_at_lsn.start_timer();
let _timer = metrics.start_timer(metrics::SmgrQueryType::GetPageAtLsn);
self.handle_get_page_at_lsn_request(&timeline, &req, &ctx)
.await
}
PagestreamFeMessage::DbSize(req) => {
let _timer = metrics.get_db_size.start_timer();
let _timer = metrics.start_timer(metrics::SmgrQueryType::GetDbSize);
self.handle_db_size_request(&timeline, &req, &ctx).await
}
};
@@ -984,8 +952,8 @@ where
false
};
metrics::metric_vec_duration::observe_async_block_duration_by_result(
&*crate::metrics::BASEBACKUP_QUERY_TIME,
::metrics::metric_vec_duration::observe_async_block_duration_by_result(
&*metrics::BASEBACKUP_QUERY_TIME,
async move {
self.handle_basebackup_request(
pgb,

View File

@@ -70,6 +70,7 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = (
"pageserver_getpage_reconstruct_seconds_count",
"pageserver_getpage_reconstruct_seconds_sum",
*[f"pageserver_basebackup_query_seconds_{x}" for x in ["bucket", "count", "sum"]],
*histogram("pageserver_smgr_query_seconds_global"),
*histogram("pageserver_read_num_fs_layers"),
*histogram("pageserver_getpage_get_reconstruct_data_seconds"),
*histogram("pageserver_wait_lsn_seconds"),