mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-18 05:30:37 +00:00
Compare commits
10 Commits
test-proxy
...
erik/histo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
47293e836c | ||
|
|
71dcd1286b | ||
|
|
abdd5d1531 | ||
|
|
b31b5b69ba | ||
|
|
909cca1e25 | ||
|
|
c2afddbcba | ||
|
|
697c67ea85 | ||
|
|
f2aa8c5db9 | ||
|
|
07ac1653c1 | ||
|
|
74855f4b8c |
@@ -1,5 +1,5 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::SystemTime;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use consumption_metrics::EventType;
|
||||
@@ -24,6 +24,12 @@ pub(super) enum Name {
|
||||
/// Timeline last_record_lsn, incremental
|
||||
#[serde(rename = "written_data_bytes_delta")]
|
||||
WrittenSizeDelta,
|
||||
/// Timeline's PITR cutoff LSN (now - pitr_interval). If PITR is disabled
|
||||
/// (pitr_interval = 0), this will equal WrittenSize.
|
||||
///
|
||||
/// Updated periodically during GC. Does not regress.
|
||||
#[serde(rename = "pitr_cutoff")]
|
||||
PitrCutoff,
|
||||
/// Timeline logical size
|
||||
#[serde(rename = "timeline_logical_size")]
|
||||
LogicalSize,
|
||||
@@ -157,6 +163,18 @@ impl MetricsKey {
|
||||
.incremental_values()
|
||||
}
|
||||
|
||||
/// [`GcCutoffs::time`] (i.e. `now` - `pitr_interval`)
|
||||
///
|
||||
/// [`GcCutoffs::time`]: crate::tenant::timeline::GcCutoffs::time
|
||||
const fn pitr_cutoff(tenant_id: TenantId, timeline_id: TimelineId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
tenant_id,
|
||||
timeline_id: Some(timeline_id),
|
||||
metric: Name::PitrCutoff,
|
||||
}
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// Exact [`Timeline::get_current_logical_size`].
|
||||
///
|
||||
/// [`Timeline::get_current_logical_size`]: crate::tenant::Timeline::get_current_logical_size
|
||||
@@ -335,6 +353,8 @@ struct TimelineSnapshot {
|
||||
loaded_at: (Lsn, SystemTime),
|
||||
last_record_lsn: Lsn,
|
||||
current_exact_logical_size: Option<u64>,
|
||||
/// The PITR cutoff LSN. None if PITR is disabled.
|
||||
pitr_cutoff: Option<Lsn>,
|
||||
}
|
||||
|
||||
impl TimelineSnapshot {
|
||||
@@ -354,6 +374,8 @@ impl TimelineSnapshot {
|
||||
} else {
|
||||
let loaded_at = t.loaded_at;
|
||||
let last_record_lsn = t.get_last_record_lsn();
|
||||
let pitr_cutoff = (t.get_pitr_interval() != Duration::ZERO)
|
||||
.then(|| t.gc_info.read().unwrap().cutoffs.time);
|
||||
|
||||
let current_exact_logical_size = {
|
||||
let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id);
|
||||
@@ -374,6 +396,7 @@ impl TimelineSnapshot {
|
||||
loaded_at,
|
||||
last_record_lsn,
|
||||
current_exact_logical_size,
|
||||
pitr_cutoff,
|
||||
}))
|
||||
}
|
||||
}
|
||||
@@ -441,6 +464,25 @@ impl TimelineSnapshot {
|
||||
});
|
||||
}
|
||||
|
||||
// Compute the PITR cutoff. We have to make sure this doesn't regress:
|
||||
// on restart, GC may not run for a while, and the PITR cutoff will be
|
||||
// reported as 0 (which would bill the user for the entire history from
|
||||
// 0 to last_record_lsn). We therefore clamp this to the cached value
|
||||
// (which persists across restarts).
|
||||
//
|
||||
// If pitr_interval is disabled (pitr_cutoff is None), the cutoff is the
|
||||
// last record LSN, by definition. We don't use the written_size
|
||||
// clamping from above, because our own caching should result in
|
||||
// equivalent clamping.
|
||||
let pitr_cutoff_key = MetricsKey::pitr_cutoff(tenant_id, timeline_id);
|
||||
let mut pitr_cutoff = self
|
||||
.pitr_cutoff
|
||||
.unwrap_or(self.last_record_lsn /* PITR disabled */);
|
||||
if let Some(prev) = cache.get(pitr_cutoff_key.key()) {
|
||||
pitr_cutoff = pitr_cutoff.max(Lsn(prev.value)); // don't regress
|
||||
}
|
||||
metrics.push(pitr_cutoff_key.at(now, pitr_cutoff.into()));
|
||||
|
||||
{
|
||||
let factory = MetricsKey::timeline_logical_size(tenant_id, timeline_id);
|
||||
let current_or_previous = self
|
||||
|
||||
@@ -12,12 +12,15 @@ fn startup_collected_timeline_metrics_before_advancing() {
|
||||
let cache = HashMap::new();
|
||||
|
||||
let initdb_lsn = Lsn(0x10000);
|
||||
let pitr_cutoff = Lsn(0x11000);
|
||||
let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
|
||||
let logical_size = 0x42000;
|
||||
|
||||
let snap = TimelineSnapshot {
|
||||
loaded_at: (disk_consistent_lsn, SystemTime::now()),
|
||||
last_record_lsn: disk_consistent_lsn,
|
||||
current_exact_logical_size: Some(0x42000),
|
||||
current_exact_logical_size: Some(logical_size),
|
||||
pitr_cutoff: Some(pitr_cutoff),
|
||||
};
|
||||
|
||||
let now = DateTime::<Utc>::from(SystemTime::now());
|
||||
@@ -33,7 +36,8 @@ fn startup_collected_timeline_metrics_before_advancing() {
|
||||
0
|
||||
),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -49,7 +53,9 @@ fn startup_collected_timeline_metrics_second_round() {
|
||||
let before = DateTime::<Utc>::from(before);
|
||||
|
||||
let initdb_lsn = Lsn(0x10000);
|
||||
let pitr_cutoff = Lsn(0x11000);
|
||||
let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
|
||||
let logical_size = 0x42000;
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
let cache = HashMap::from([MetricsKey::written_size(tenant_id, timeline_id)
|
||||
@@ -59,7 +65,8 @@ fn startup_collected_timeline_metrics_second_round() {
|
||||
let snap = TimelineSnapshot {
|
||||
loaded_at: (disk_consistent_lsn, init),
|
||||
last_record_lsn: disk_consistent_lsn,
|
||||
current_exact_logical_size: Some(0x42000),
|
||||
current_exact_logical_size: Some(logical_size),
|
||||
pitr_cutoff: Some(pitr_cutoff),
|
||||
};
|
||||
|
||||
snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
|
||||
@@ -69,7 +76,8 @@ fn startup_collected_timeline_metrics_second_round() {
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -86,7 +94,9 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
|
||||
let before = DateTime::<Utc>::from(before);
|
||||
|
||||
let initdb_lsn = Lsn(0x10000);
|
||||
let pitr_cutoff = Lsn(0x11000);
|
||||
let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
|
||||
let logical_size = 0x42000;
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
let cache = HashMap::from([
|
||||
@@ -103,7 +113,8 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
|
||||
let snap = TimelineSnapshot {
|
||||
loaded_at: (disk_consistent_lsn, init),
|
||||
last_record_lsn: disk_consistent_lsn,
|
||||
current_exact_logical_size: Some(0x42000),
|
||||
current_exact_logical_size: Some(logical_size),
|
||||
pitr_cutoff: Some(pitr_cutoff),
|
||||
};
|
||||
|
||||
snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
|
||||
@@ -113,13 +124,14 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(just_before, now, 0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
fn post_restart_written_sizes_with_rolled_back_last_record_lsn_and_pitr_cutoff() {
|
||||
// it can happen that we lose the inmemorylayer but have previously sent metrics and we
|
||||
// should never go backwards
|
||||
|
||||
@@ -141,6 +153,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
loaded_at: (Lsn(50), at_restart),
|
||||
last_record_lsn: Lsn(50),
|
||||
current_exact_logical_size: None,
|
||||
pitr_cutoff: Some(Lsn(20)),
|
||||
};
|
||||
|
||||
let mut cache = HashMap::from([
|
||||
@@ -155,6 +168,9 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
999_999_999,
|
||||
)
|
||||
.to_kv_pair(),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id)
|
||||
.at(before_restart, 70)
|
||||
.to_kv_pair(),
|
||||
]);
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
@@ -169,6 +185,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
0
|
||||
),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, 70),
|
||||
]
|
||||
);
|
||||
|
||||
@@ -183,6 +200,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(later, 70),
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -202,6 +220,7 @@ fn post_restart_current_exact_logical_size_uses_cached() {
|
||||
loaded_at: (Lsn(50), at_restart),
|
||||
last_record_lsn: Lsn(50),
|
||||
current_exact_logical_size: None,
|
||||
pitr_cutoff: None,
|
||||
};
|
||||
|
||||
let cache = HashMap::from([MetricsKey::timeline_logical_size(tenant_id, timeline_id)
|
||||
@@ -286,16 +305,53 @@ fn time_backwards<const N: usize>() -> [std::time::SystemTime; N] {
|
||||
times
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pitr_cutoff_none_yields_last_record_lsn() {
|
||||
let tenant_id = TenantId::generate();
|
||||
let timeline_id = TimelineId::generate();
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
let cache = HashMap::new();
|
||||
|
||||
let initdb_lsn = Lsn(0x10000);
|
||||
let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
|
||||
|
||||
let snap = TimelineSnapshot {
|
||||
loaded_at: (disk_consistent_lsn, SystemTime::now()),
|
||||
last_record_lsn: disk_consistent_lsn,
|
||||
current_exact_logical_size: None,
|
||||
pitr_cutoff: None,
|
||||
};
|
||||
|
||||
let now = DateTime::<Utc>::from(SystemTime::now());
|
||||
|
||||
snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
|
||||
|
||||
assert_eq!(
|
||||
metrics,
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(
|
||||
snap.loaded_at.1.into(),
|
||||
now,
|
||||
0
|
||||
),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) const fn metric_examples_old(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [RawMetric; 5] {
|
||||
) -> [RawMetric; 6] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id)
|
||||
.from_until_old_format(before, now, 0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),
|
||||
MetricsKey::synthetic_size(tenant_id).at_old_format(now, 1),
|
||||
@@ -307,10 +363,11 @@ pub(crate) const fn metric_examples(
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [NewRawMetric; 5] {
|
||||
) -> [NewRawMetric; 6] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at(now, 0),
|
||||
MetricsKey::synthetic_size(tenant_id).at(now, 1),
|
||||
|
||||
@@ -513,6 +513,10 @@ mod tests {
|
||||
line!(),
|
||||
r#"{"type":"incremental","start_time":"2023-09-14T00:00:00.123456789Z","stop_time":"2023-09-15T00:00:00.123456789Z","metric":"written_data_bytes_delta","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000","timeline_id":"ffffffffffffffffffffffffffffffff"}"#,
|
||||
),
|
||||
(
|
||||
line!(),
|
||||
r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"pitr_cutoff","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000","timeline_id":"ffffffffffffffffffffffffffffffff"}"#,
|
||||
),
|
||||
(
|
||||
line!(),
|
||||
r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"timeline_logical_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000","timeline_id":"ffffffffffffffffffffffffffffffff"}"#,
|
||||
@@ -560,7 +564,7 @@ mod tests {
|
||||
assert_eq!(upgraded_samples, new_samples);
|
||||
}
|
||||
|
||||
fn metric_samples_old() -> [RawMetric; 5] {
|
||||
fn metric_samples_old() -> [RawMetric; 6] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
@@ -572,7 +576,7 @@ mod tests {
|
||||
super::super::metrics::metric_examples_old(tenant_id, timeline_id, now, before)
|
||||
}
|
||||
|
||||
fn metric_samples() -> [NewRawMetric; 5] {
|
||||
fn metric_samples() -> [NewRawMetric; 6] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
|
||||
@@ -2545,6 +2545,13 @@ impl Timeline {
|
||||
.unwrap_or(self.conf.default_tenant_conf.checkpoint_timeout)
|
||||
}
|
||||
|
||||
pub(crate) fn get_pitr_interval(&self) -> Duration {
|
||||
let tenant_conf = &self.tenant_conf.load().tenant_conf;
|
||||
tenant_conf
|
||||
.pitr_interval
|
||||
.unwrap_or(self.conf.default_tenant_conf.pitr_interval)
|
||||
}
|
||||
|
||||
fn get_compaction_period(&self) -> Duration {
|
||||
let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
|
||||
tenant_conf
|
||||
|
||||
@@ -508,6 +508,7 @@ PER_METRIC_VERIFIERS = {
|
||||
"remote_storage_size": CannotVerifyAnything,
|
||||
"written_size": WrittenDataVerifier,
|
||||
"written_data_bytes_delta": WrittenDataDeltaVerifier,
|
||||
"pitr_cutoff": CannotVerifyAnything,
|
||||
"timeline_logical_size": CannotVerifyAnything,
|
||||
"synthetic_storage_size": SyntheticSizeVerifier,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user