mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-15 09:22:55 +00:00
Rewrite as pitr_cutoff, don't regress
This commit is contained in:
@@ -24,9 +24,12 @@ pub(super) enum Name {
|
||||
/// Timeline last_record_lsn, incremental
|
||||
#[serde(rename = "written_data_bytes_delta")]
|
||||
WrittenSizeDelta,
|
||||
/// Timeline last_record_lsn - gc_cutoffs.time (i.e. pitr_interval)
|
||||
#[serde(rename = "pitr_history_size")]
|
||||
PitrHistorySize,
|
||||
/// Timeline's PITR cutoff LSN (now - pitr_interval). If PITR is disabled
|
||||
/// (pitr_interval = 0), this will equal WrittenSize.
|
||||
///
|
||||
/// Updated periodically during GC. Does not regress.
|
||||
#[serde(rename = "pitr_cutoff")]
|
||||
PitrCutoff,
|
||||
/// Timeline logical size
|
||||
#[serde(rename = "timeline_logical_size")]
|
||||
LogicalSize,
|
||||
@@ -163,18 +166,14 @@ impl MetricsKey {
|
||||
.incremental_values()
|
||||
}
|
||||
|
||||
/// [`Timeline::get_last_record_lsn`] - [`GcCutoffs::time`] (i.e. `pitr_interval`).
|
||||
/// [`GcCutoffs::time`] (i.e. `now` - `pitr_interval`)
|
||||
///
|
||||
/// [`Timeline::get_last_record_lsn`]: crate::tenant::Timeline::get_last_record_lsn
|
||||
/// [`GcCutoffs::time`]: crate::tenant::timeline::GcCutoffs::time
|
||||
const fn pitr_history_size(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> AbsoluteValueFactory {
|
||||
const fn pitr_cutoff(tenant_id: TenantId, timeline_id: TimelineId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
tenant_id,
|
||||
timeline_id: Some(timeline_id),
|
||||
metric: Name::PitrHistorySize,
|
||||
metric: Name::PitrCutoff,
|
||||
}
|
||||
.absolute_values()
|
||||
}
|
||||
@@ -501,17 +500,24 @@ impl TimelineSnapshot {
|
||||
});
|
||||
}
|
||||
|
||||
// Compute the PITR history size.
|
||||
// Compute the PITR cutoff. We have to make sure this doesn't regress:
|
||||
// on restart, GC may not run for a while, and the PITR cutoff will be
|
||||
// reported as 0 (which would bill the user for the entire history from
|
||||
// 0 to last_record_lsn). We therefore clamp this to the cached value
|
||||
// (which persists across restarts).
|
||||
//
|
||||
// TODO: verify that the GC cutoffs don't severely regress, e.g. to 0 such that we bill the
|
||||
// entire history. Also verify that it's okay for this to regress on restart, unlike e.g.
|
||||
// written_size above.
|
||||
let pitr_history_size_key = MetricsKey::pitr_history_size(tenant_id, timeline_id);
|
||||
let pitr_history_size = self
|
||||
// If pitr_interval is disabled (pitr_cutoff is None), the cutoff is the
|
||||
// last record LSN, by definition. We don't use the written_size
|
||||
// clamping from above, because our own caching should result in
|
||||
// equivalent clamping.
|
||||
let pitr_cutoff_key = MetricsKey::pitr_cutoff(tenant_id, timeline_id);
|
||||
let mut pitr_cutoff = self
|
||||
.pitr_cutoff
|
||||
.map(|pitr| self.last_record_lsn.saturating_sub(pitr))
|
||||
.unwrap_or_default();
|
||||
metrics.push(pitr_history_size_key.at(now, pitr_history_size.into()));
|
||||
.unwrap_or(self.last_record_lsn /* PITR disabled */);
|
||||
if let Some(prev) = cache.get(pitr_cutoff_key.key()) {
|
||||
pitr_cutoff = pitr_cutoff.max(Lsn(prev.value)); // don't regress
|
||||
}
|
||||
metrics.push(pitr_cutoff_key.at(now, pitr_cutoff.into()));
|
||||
|
||||
{
|
||||
let factory = MetricsKey::timeline_logical_size(tenant_id, timeline_id);
|
||||
|
||||
@@ -36,8 +36,7 @@ fn startup_collected_timeline_metrics_before_advancing() {
|
||||
0
|
||||
),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::pitr_history_size(tenant_id, timeline_id)
|
||||
.at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
|
||||
]
|
||||
);
|
||||
@@ -77,8 +76,7 @@ fn startup_collected_timeline_metrics_second_round() {
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::pitr_history_size(tenant_id, timeline_id)
|
||||
.at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
|
||||
]
|
||||
);
|
||||
@@ -126,15 +124,14 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(just_before, now, 0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::pitr_history_size(tenant_id, timeline_id)
|
||||
.at(now, disk_consistent_lsn.0 - pitr_cutoff.0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, pitr_cutoff.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, logical_size)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
fn post_restart_written_sizes_with_rolled_back_last_record_lsn_and_pitr_cutoff() {
|
||||
// it can happen that we lose the inmemorylayer but have previously sent metrics and we
|
||||
// should never go backwards
|
||||
|
||||
@@ -156,7 +153,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
loaded_at: (Lsn(50), at_restart),
|
||||
last_record_lsn: Lsn(50),
|
||||
current_exact_logical_size: None,
|
||||
pitr_cutoff: Some(Lsn(0)),
|
||||
pitr_cutoff: Some(Lsn(20)),
|
||||
};
|
||||
|
||||
let mut cache = HashMap::from([
|
||||
@@ -171,6 +168,9 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
999_999_999,
|
||||
)
|
||||
.to_kv_pair(),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id)
|
||||
.at(before_restart, 70)
|
||||
.to_kv_pair(),
|
||||
]);
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
@@ -185,7 +185,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
0
|
||||
),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, 100),
|
||||
MetricsKey::pitr_history_size(tenant_id, timeline_id).at(now, 50), // does regress
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, 70),
|
||||
]
|
||||
);
|
||||
|
||||
@@ -200,7 +200,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(now, later, 0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(later, 100),
|
||||
MetricsKey::pitr_history_size(tenant_id, timeline_id).at(later, 50),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(later, 70),
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -309,16 +309,53 @@ fn time_backwards<const N: usize>() -> [std::time::SystemTime; N] {
|
||||
times
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pitr_cutoff_none_yields_last_record_lsn() {
|
||||
let tenant_id = TenantId::generate();
|
||||
let timeline_id = TimelineId::generate();
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
let cache = HashMap::new();
|
||||
|
||||
let initdb_lsn = Lsn(0x10000);
|
||||
let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
|
||||
|
||||
let snap = TimelineSnapshot {
|
||||
loaded_at: (disk_consistent_lsn, SystemTime::now()),
|
||||
last_record_lsn: disk_consistent_lsn,
|
||||
current_exact_logical_size: None,
|
||||
pitr_cutoff: None,
|
||||
};
|
||||
|
||||
let now = DateTime::<Utc>::from(SystemTime::now());
|
||||
|
||||
snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
|
||||
|
||||
assert_eq!(
|
||||
metrics,
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(
|
||||
snap.loaded_at.1.into(),
|
||||
now,
|
||||
0
|
||||
),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) const fn metric_examples_old(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [RawMetric; 6] {
|
||||
) -> [RawMetric; 7] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id)
|
||||
.from_until_old_format(before, now, 0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),
|
||||
MetricsKey::resident_size(tenant_id).at_old_format(now, 0),
|
||||
@@ -335,7 +372,7 @@ pub(crate) const fn metric_examples(
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
|
||||
MetricsKey::pitr_history_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::pitr_cutoff(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at(now, 0),
|
||||
MetricsKey::resident_size(tenant_id).at(now, 0),
|
||||
|
||||
@@ -568,7 +568,7 @@ mod tests {
|
||||
assert_eq!(upgraded_samples, new_samples);
|
||||
}
|
||||
|
||||
fn metric_samples_old() -> [RawMetric; 6] {
|
||||
fn metric_samples_old() -> [RawMetric; 7] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user