mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-15 01:12:56 +00:00
* Add test for pageserver metric cleanup once a tenant is detached. * Remove tenant specific timeline metrics on detach. * Use definitions from timeline_metrics in page service. * Move metrics to own file from layered_repository/timeline.rs * TIMELINE_METRICS: define smgr metrics * REMOVE SMGR cleanup from timeline_metrics. Doesn't seem to work as expected. * Vritual file centralized metrics, except for evicted file as there's no tenat id or timeline id. * Use STORAGE_TIME from timeline_metrics in layered_repository. * Remove timelineless gc metrics for tenant on detach. * Rename timeline metrics -> metrics as it's more generic. * Don't create a TimelineMetrics instance for VirtualFile * Move the rest of the metric definitions to metrics.rs too. * UUID -> ZTenantId * Use consistent style for dict. * Use Repository's Drop trait for dropping STORAGE_TIME metrics. * No need for Arc, TimelineMetrics is used in just one place. Due to that, we can fall back using ZTenantId and ZTimelineId too to avoid additional string allocation.
64 lines
2.1 KiB
Python
64 lines
2.1 KiB
Python
from collections import defaultdict
|
|
from typing import Dict, List
|
|
|
|
from prometheus_client.parser import text_string_to_metric_families
|
|
from prometheus_client.samples import Sample
|
|
|
|
|
|
class Metrics:
|
|
metrics: Dict[str, List[Sample]]
|
|
name: str
|
|
|
|
def __init__(self, name: str = ""):
|
|
self.metrics = defaultdict(list)
|
|
self.name = name
|
|
|
|
def query_all(self, name: str, filter: Dict[str, str]) -> List[Sample]:
|
|
res = []
|
|
for sample in self.metrics[name]:
|
|
try:
|
|
if all(sample.labels[k] == v for k, v in filter.items()):
|
|
res.append(sample)
|
|
except KeyError:
|
|
pass
|
|
return res
|
|
|
|
def query_one(self, name: str, filter: Dict[str, str] = {}) -> Sample:
|
|
res = self.query_all(name, filter)
|
|
assert len(res) == 1, f"expected single sample for {name} {filter}, found {res}"
|
|
return res[0]
|
|
|
|
|
|
def parse_metrics(text: str, name: str = ""):
|
|
metrics = Metrics(name)
|
|
gen = text_string_to_metric_families(text)
|
|
for family in gen:
|
|
for sample in family.samples:
|
|
metrics.metrics[sample.name].append(sample)
|
|
|
|
return metrics
|
|
|
|
|
|
PAGESERVER_PER_TENANT_METRICS = [
|
|
"pageserver_current_logical_size",
|
|
"pageserver_current_physical_size",
|
|
"pageserver_getpage_reconstruct_seconds_bucket",
|
|
"pageserver_getpage_reconstruct_seconds_count",
|
|
"pageserver_getpage_reconstruct_seconds_sum",
|
|
"pageserver_io_operations_bytes_total",
|
|
"pageserver_io_operations_seconds_bucket",
|
|
"pageserver_io_operations_seconds_count",
|
|
"pageserver_io_operations_seconds_sum",
|
|
"pageserver_last_record_lsn",
|
|
"pageserver_materialized_cache_hits_total",
|
|
"pageserver_smgr_query_seconds_bucket",
|
|
"pageserver_smgr_query_seconds_count",
|
|
"pageserver_smgr_query_seconds_sum",
|
|
"pageserver_storage_operations_seconds_bucket",
|
|
"pageserver_storage_operations_seconds_count",
|
|
"pageserver_storage_operations_seconds_sum",
|
|
"pageserver_wait_lsn_seconds_bucket",
|
|
"pageserver_wait_lsn_seconds_count",
|
|
"pageserver_wait_lsn_seconds_sum",
|
|
]
|