Files
neon/test_runner/fixtures/metrics.py
Lassi Pölönen f081419e68 Cleanup tenant specific metrics once a tenant is detached. (#2328)
* Add test for pageserver metric cleanup once a tenant is detached.

* Remove tenant specific timeline metrics on detach.

* Use definitions from timeline_metrics in page service.

* Move metrics to own file from layered_repository/timeline.rs

* TIMELINE_METRICS: define smgr metrics

* REMOVE SMGR cleanup from timeline_metrics. Doesn't seem to work as
expected.

* Vritual file centralized metrics, except for evicted file as there's no
tenat id or timeline id.

* Use STORAGE_TIME from timeline_metrics in layered_repository.

* Remove timelineless gc metrics for tenant on detach.

* Rename timeline metrics -> metrics as it's more generic.

* Don't create a TimelineMetrics instance for VirtualFile

* Move the rest of the metric definitions to metrics.rs too.

* UUID -> ZTenantId

* Use consistent style for dict.

* Use Repository's Drop trait for dropping STORAGE_TIME metrics.

* No need for Arc, TimelineMetrics is used in just one place. Due to that,
we can fall back using ZTenantId and ZTimelineId too to avoid additional
string allocation.
2022-09-06 11:30:20 +03:00

64 lines
2.1 KiB
Python

from collections import defaultdict
from typing import Dict, List
from prometheus_client.parser import text_string_to_metric_families
from prometheus_client.samples import Sample
class Metrics:
metrics: Dict[str, List[Sample]]
name: str
def __init__(self, name: str = ""):
self.metrics = defaultdict(list)
self.name = name
def query_all(self, name: str, filter: Dict[str, str]) -> List[Sample]:
res = []
for sample in self.metrics[name]:
try:
if all(sample.labels[k] == v for k, v in filter.items()):
res.append(sample)
except KeyError:
pass
return res
def query_one(self, name: str, filter: Dict[str, str] = {}) -> Sample:
res = self.query_all(name, filter)
assert len(res) == 1, f"expected single sample for {name} {filter}, found {res}"
return res[0]
def parse_metrics(text: str, name: str = ""):
metrics = Metrics(name)
gen = text_string_to_metric_families(text)
for family in gen:
for sample in family.samples:
metrics.metrics[sample.name].append(sample)
return metrics
PAGESERVER_PER_TENANT_METRICS = [
"pageserver_current_logical_size",
"pageserver_current_physical_size",
"pageserver_getpage_reconstruct_seconds_bucket",
"pageserver_getpage_reconstruct_seconds_count",
"pageserver_getpage_reconstruct_seconds_sum",
"pageserver_io_operations_bytes_total",
"pageserver_io_operations_seconds_bucket",
"pageserver_io_operations_seconds_count",
"pageserver_io_operations_seconds_sum",
"pageserver_last_record_lsn",
"pageserver_materialized_cache_hits_total",
"pageserver_smgr_query_seconds_bucket",
"pageserver_smgr_query_seconds_count",
"pageserver_smgr_query_seconds_sum",
"pageserver_storage_operations_seconds_bucket",
"pageserver_storage_operations_seconds_count",
"pageserver_storage_operations_seconds_sum",
"pageserver_wait_lsn_seconds_bucket",
"pageserver_wait_lsn_seconds_count",
"pageserver_wait_lsn_seconds_sum",
]