mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-14 03:30:36 +00:00
feat(pageserver): support synthetic size calculation for invisible branches (#11335)
## Problem ref https://github.com/neondatabase/neon/issues/11279 Imagine we have a branch with 3 snapshots A, B, and C: ``` base---+---+---+---main \-A \-B \-C base=100G, base-A=1G, A-B=1G, B-C=1G, C-main=1G ``` at this point, the synthetic size should be 100+1+1+1+1=104G. after the deletion, the structure looks like: ``` base---+---+---+ \-A \-B \-C ``` If we simply assume main never exists, the size will be calculated as size(A) + size(B) + size(C)=300GB, which obviously is not what the user would expect. The correct way to do this is to assume part of main still exists, that is to say, set C-main=1G: ``` base---+---+---+main \-A \-B \-C ``` And we will get the correct synthetic size of 100G+1+1+1=103G. ## Summary of changes * Do not generate gc cutoff point for invisible branches. * Use the same LSN as the last branchpoint for branch end. * Remove test_api_handler for mark_invisible. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -3774,7 +3774,7 @@ pub fn make_router(
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/mark_invisible",
|
||||
|r| testing_api_handler("mark timeline invisible", r, timeline_mark_invisible_handler),
|
||||
|r| api_handler( r, timeline_mark_invisible_handler),
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
|
||||
|
||||
@@ -11526,4 +11526,255 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[tokio::test]
|
||||
async fn test_synthetic_size_calculation_with_invisible_branches() -> anyhow::Result<()> {
|
||||
use pageserver_api::models::TimelineVisibilityState;
|
||||
|
||||
use crate::tenant::size::gather_inputs;
|
||||
|
||||
let tenant_conf = pageserver_api::models::TenantConfig {
|
||||
// Ensure that we don't compute gc_cutoffs (which needs reading the layer files)
|
||||
pitr_interval: Some(Duration::ZERO),
|
||||
..Default::default()
|
||||
};
|
||||
let harness = TenantHarness::create_custom(
|
||||
"test_synthetic_size_calculation_with_invisible_branches",
|
||||
tenant_conf,
|
||||
TenantId::generate(),
|
||||
ShardIdentity::unsharded(),
|
||||
Generation::new(0xdeadbeef),
|
||||
)
|
||||
.await?;
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let main_tline = tenant
|
||||
.create_test_timeline_with_layers(
|
||||
TIMELINE_ID,
|
||||
Lsn(0x10),
|
||||
DEFAULT_PG_VERSION,
|
||||
&ctx,
|
||||
vec![],
|
||||
vec![],
|
||||
vec![],
|
||||
Lsn(0x100),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let snapshot1 = TimelineId::from_array(hex!("11223344556677881122334455667790"));
|
||||
tenant
|
||||
.branch_timeline_test_with_layers(
|
||||
&main_tline,
|
||||
snapshot1,
|
||||
Some(Lsn(0x20)),
|
||||
&ctx,
|
||||
vec![],
|
||||
vec![],
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?;
|
||||
let snapshot2 = TimelineId::from_array(hex!("11223344556677881122334455667791"));
|
||||
tenant
|
||||
.branch_timeline_test_with_layers(
|
||||
&main_tline,
|
||||
snapshot2,
|
||||
Some(Lsn(0x30)),
|
||||
&ctx,
|
||||
vec![],
|
||||
vec![],
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?;
|
||||
let snapshot3 = TimelineId::from_array(hex!("11223344556677881122334455667792"));
|
||||
tenant
|
||||
.branch_timeline_test_with_layers(
|
||||
&main_tline,
|
||||
snapshot3,
|
||||
Some(Lsn(0x40)),
|
||||
&ctx,
|
||||
vec![],
|
||||
vec![],
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?;
|
||||
let limit = Arc::new(Semaphore::new(1));
|
||||
let max_retention_period = None;
|
||||
let mut logical_size_cache = HashMap::new();
|
||||
let cause = LogicalSizeCalculationCause::EvictionTaskImitation;
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let inputs = gather_inputs(
|
||||
&tenant,
|
||||
&limit,
|
||||
max_retention_period,
|
||||
&mut logical_size_cache,
|
||||
cause,
|
||||
&cancel,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!(
|
||||
"gather_inputs",
|
||||
tenant_id = "unknown",
|
||||
shard_id = "unknown",
|
||||
))
|
||||
.await?;
|
||||
use crate::tenant::size::{LsnKind, ModelInputs, SegmentMeta};
|
||||
use LsnKind::*;
|
||||
use tenant_size_model::Segment;
|
||||
let ModelInputs { mut segments, .. } = inputs;
|
||||
segments.retain(|s| s.timeline_id == TIMELINE_ID);
|
||||
for segment in segments.iter_mut() {
|
||||
segment.segment.parent = None; // We don't care about the parent for the test
|
||||
segment.segment.size = None; // We don't care about the size for the test
|
||||
}
|
||||
assert_eq!(
|
||||
segments,
|
||||
[
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x10,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchStart,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x20,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x30,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x40,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x100,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: GcCutOff,
|
||||
}, // we need to retain everything above the last branch point
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x100,
|
||||
size: None,
|
||||
needed: true,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchEnd,
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
main_tline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_timeline_invisible_state(
|
||||
TimelineVisibilityState::Invisible,
|
||||
)?;
|
||||
main_tline.remote_client.wait_completion().await?;
|
||||
let inputs = gather_inputs(
|
||||
&tenant,
|
||||
&limit,
|
||||
max_retention_period,
|
||||
&mut logical_size_cache,
|
||||
cause,
|
||||
&cancel,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!(
|
||||
"gather_inputs",
|
||||
tenant_id = "unknown",
|
||||
shard_id = "unknown",
|
||||
))
|
||||
.await?;
|
||||
let ModelInputs { mut segments, .. } = inputs;
|
||||
segments.retain(|s| s.timeline_id == TIMELINE_ID);
|
||||
for segment in segments.iter_mut() {
|
||||
segment.segment.parent = None; // We don't care about the parent for the test
|
||||
segment.segment.size = None; // We don't care about the size for the test
|
||||
}
|
||||
assert_eq!(
|
||||
segments,
|
||||
[
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x10,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchStart,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x20,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x30,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x40,
|
||||
size: None,
|
||||
needed: false,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchPoint,
|
||||
},
|
||||
SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: None,
|
||||
lsn: 0x40, // Branch end LSN == last branch point LSN
|
||||
size: None,
|
||||
needed: true,
|
||||
},
|
||||
timeline_id: TIMELINE_ID,
|
||||
kind: BranchEnd,
|
||||
},
|
||||
]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ pub struct ModelInputs {
|
||||
}
|
||||
|
||||
/// A [`Segment`], with some extra information for display purposes
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
|
||||
pub struct SegmentMeta {
|
||||
pub segment: Segment,
|
||||
pub timeline_id: TimelineId,
|
||||
@@ -248,6 +248,8 @@ pub(super) async fn gather_inputs(
|
||||
None
|
||||
};
|
||||
|
||||
let branch_is_invisible = timeline.is_invisible() == Some(true);
|
||||
|
||||
let lease_points = gc_info
|
||||
.leases
|
||||
.keys()
|
||||
@@ -271,7 +273,10 @@ pub(super) async fn gather_inputs(
|
||||
.map(|(lsn, _child_id, _is_offloaded)| (lsn, LsnKind::BranchPoint))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));
|
||||
if !branch_is_invisible {
|
||||
// Do not count lease points for invisible branches.
|
||||
lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));
|
||||
}
|
||||
|
||||
drop(gc_info);
|
||||
|
||||
@@ -287,7 +292,9 @@ pub(super) async fn gather_inputs(
|
||||
|
||||
// Add a point for the PITR cutoff
|
||||
let branch_start_needed = next_pitr_cutoff <= branch_start_lsn;
|
||||
if !branch_start_needed {
|
||||
if !branch_start_needed && !branch_is_invisible {
|
||||
// Only add the GcCutOff point when the timeline is visible; otherwise, do not compute the size for the LSN
|
||||
// range from the last branch point to the latest data.
|
||||
lsns.push((next_pitr_cutoff, LsnKind::GcCutOff));
|
||||
}
|
||||
|
||||
@@ -373,11 +380,19 @@ pub(super) async fn gather_inputs(
|
||||
}
|
||||
}
|
||||
|
||||
let branch_end_lsn = if branch_is_invisible {
|
||||
// If the branch is invisible, the branch end is the last requested LSN (likely a branch cutoff point).
|
||||
segments.last().unwrap().segment.lsn
|
||||
} else {
|
||||
// Otherwise, the branch end is the last record LSN.
|
||||
last_record_lsn.0
|
||||
};
|
||||
|
||||
// Current end of the timeline
|
||||
segments.push(SegmentMeta {
|
||||
segment: Segment {
|
||||
parent: Some(parent),
|
||||
lsn: last_record_lsn.0,
|
||||
lsn: branch_end_lsn,
|
||||
size: None, // Filled in later, if necessary
|
||||
needed: true,
|
||||
},
|
||||
@@ -609,6 +624,7 @@ async fn calculate_logical_size(
|
||||
Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[test]
|
||||
fn verify_size_for_multiple_branches() {
|
||||
// this is generated from integration test test_tenant_size_with_multiple_branches, but this way
|
||||
@@ -766,6 +782,7 @@ fn verify_size_for_multiple_branches() {
|
||||
assert_eq!(inputs.calculate(), 37_851_408);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[test]
|
||||
fn verify_size_for_one_branch() {
|
||||
let doc = r#"
|
||||
|
||||
@@ -2215,6 +2215,10 @@ impl Timeline {
|
||||
self.remote_client.is_archived()
|
||||
}
|
||||
|
||||
pub(crate) fn is_invisible(&self) -> Option<bool> {
|
||||
self.remote_client.is_invisible()
|
||||
}
|
||||
|
||||
pub(crate) fn is_stopping(&self) -> bool {
|
||||
self.current_state() == TimelineState::Stopping
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user