mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-31 20:10:38 +00:00
Add counters for commands processed through the libpq page service API (#8089)
I was looking for metrics on how many computes are still using protocol version 1 and 2. This provides counters for that as "pagestream" and "pagestream_v2" commands, but also all the other commands. The new metrics are global for the whole pageserver instance rather than per-tenant, so the additional metrics bloat should be fairly small.
This commit is contained in:
committed by
GitHub
parent
24ce73ffaf
commit
5b871802fd
@@ -1445,6 +1445,46 @@ pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
#[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
|
||||
pub(crate) enum ComputeCommandKind {
|
||||
PageStreamV2,
|
||||
PageStream,
|
||||
Basebackup,
|
||||
GetLastRecordRlsn,
|
||||
Fullbackup,
|
||||
ImportBasebackup,
|
||||
ImportWal,
|
||||
LeaseLsn,
|
||||
Show,
|
||||
}
|
||||
|
||||
pub(crate) struct ComputeCommandCounters {
|
||||
map: EnumMap<ComputeCommandKind, IntCounter>,
|
||||
}
|
||||
|
||||
pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
|
||||
let inner = register_int_counter_vec!(
|
||||
"pageserver_compute_commands",
|
||||
"Number of compute -> pageserver commands processed",
|
||||
&["command"]
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
|
||||
ComputeCommandCounters {
|
||||
map: EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
|
||||
let command_str: &'static str = command.into();
|
||||
inner.with_label_values(&[command_str])
|
||||
})),
|
||||
}
|
||||
});
|
||||
|
||||
impl ComputeCommandCounters {
|
||||
pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
|
||||
&self.map[command]
|
||||
}
|
||||
}
|
||||
|
||||
// remote storage metrics
|
||||
|
||||
static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
|
||||
@@ -2949,4 +2989,5 @@ pub fn preinitialize_metrics() {
|
||||
Lazy::force(&RECONSTRUCT_TIME);
|
||||
Lazy::force(&tenant_throttling::TIMELINE_GET);
|
||||
Lazy::force(&BASEBACKUP_QUERY_TIME);
|
||||
Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ use crate::basebackup::BasebackupError;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::import_datadir::import_wal_from_tar;
|
||||
use crate::metrics;
|
||||
use crate::metrics::LIVE_CONNECTIONS_COUNT;
|
||||
use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS_COUNT};
|
||||
use crate::pgdatadir_mapping::Version;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
|
||||
@@ -1554,6 +1554,10 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::PageStreamV2)
|
||||
.inc();
|
||||
|
||||
self.handle_pagerequests(
|
||||
pgb,
|
||||
tenant_id,
|
||||
@@ -1579,6 +1583,10 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::PageStream)
|
||||
.inc();
|
||||
|
||||
self.handle_pagerequests(
|
||||
pgb,
|
||||
tenant_id,
|
||||
@@ -1605,6 +1613,10 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::Basebackup)
|
||||
.inc();
|
||||
|
||||
let lsn = if let Some(lsn_str) = params.get(2) {
|
||||
Some(
|
||||
Lsn::from_str(lsn_str)
|
||||
@@ -1662,6 +1674,11 @@ where
|
||||
.record("timeline_id", field::display(timeline_id));
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::GetLastRecordRlsn)
|
||||
.inc();
|
||||
|
||||
async {
|
||||
let timeline = self
|
||||
.get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
@@ -1723,6 +1740,10 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::Fullbackup)
|
||||
.inc();
|
||||
|
||||
// Check that the timeline exists
|
||||
self.handle_basebackup_request(
|
||||
pgb,
|
||||
@@ -1771,6 +1792,10 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::ImportBasebackup)
|
||||
.inc();
|
||||
|
||||
match self
|
||||
.handle_import_basebackup(
|
||||
pgb,
|
||||
@@ -1818,6 +1843,10 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::ImportWal)
|
||||
.inc();
|
||||
|
||||
match self
|
||||
.handle_import_wal(pgb, tenant_id, timeline_id, start_lsn, end_lsn, ctx)
|
||||
.await
|
||||
@@ -1855,6 +1884,10 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::LeaseLsn)
|
||||
.inc();
|
||||
|
||||
// The caller is responsible for providing correct lsn.
|
||||
let lsn = Lsn::from_str(params[2])
|
||||
.with_context(|| format!("Failed to parse Lsn from {}", params[2]))?;
|
||||
@@ -1886,6 +1919,10 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::Show)
|
||||
.inc();
|
||||
|
||||
let tenant = self
|
||||
.get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
|
||||
Reference in New Issue
Block a user