feat(pageserver): integrate lsn lease into synthetic size (#8220)

Part of #7497, closes #8071. (accidentally closed #8208, reopened here)

## Problem

After the changes in #8084, we need synthetic size to also account for
leased LSNs so that users do not get free retention by running a small
ephemeral endpoint for a long time.

## Summary of changes

This PR integrates LSN leases into the synthetic size calculation. We
model leases as read-only branches started at the leased LSN (except it
does not have a timeline id).

Other changes:
- Add new unit tests testing whether a lease behaves like a read-only
branch.
- Change `/size_debug` response to include lease point in the SVG
visualization.
- Fix `/lsn_lease` HTTP API to do proper parsing for POST.



Signed-off-by: Yuchen Liang <yuchen@neon.tech>
Co-authored-by: Joonas Koivunen <joonas@neon.tech>
Co-authored-by: Christian Schwarz <christian@neon.tech>
This commit is contained in:
Yuchen Liang
2024-07-04 11:09:05 -04:00
committed by Vlad Lazar
parent bd2046e1ab
commit 32828cddd6
9 changed files with 256 additions and 27 deletions

View File

@@ -265,15 +265,19 @@ paths:
type: string
format: hex
post:
description: Obtain lease for the given LSN
parameters:
- name: lsn
in: query
required: true
schema:
type: string
format: hex
description: A LSN to obtain the lease for
description: Obtains a lease for the given LSN.
requestBody:
content:
application/json:
schema:
type: object
required:
- lsn
properties:
lsn:
description: A LSN to obtain the lease for.
type: string
format: hex
responses:
"200":
description: OK

View File

@@ -22,6 +22,7 @@ use pageserver_api::models::ListAuxFilesRequest;
use pageserver_api::models::LocationConfig;
use pageserver_api::models::LocationConfigListResponse;
use pageserver_api::models::LsnLease;
use pageserver_api::models::LsnLeaseRequest;
use pageserver_api::models::ShardParameters;
use pageserver_api::models::TenantDetails;
use pageserver_api::models::TenantLocationConfigResponse;
@@ -42,7 +43,7 @@ use pageserver_api::shard::TenantShardId;
use remote_storage::DownloadError;
use remote_storage::GenericRemoteStorage;
use remote_storage::TimeTravelError;
use tenant_size_model::{SizeResult, StorageModel};
use tenant_size_model::{svg::SvgBranchKind, SizeResult, StorageModel};
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::auth::JwtAuth;
@@ -1195,10 +1196,15 @@ fn synthetic_size_html_response(
timeline_map.insert(ti.timeline_id, index);
timeline_ids.push(ti.timeline_id.to_string());
}
let seg_to_branch: Vec<usize> = inputs
let seg_to_branch: Vec<(usize, SvgBranchKind)> = inputs
.segments
.iter()
.map(|seg| *timeline_map.get(&seg.timeline_id).unwrap())
.map(|seg| {
(
*timeline_map.get(&seg.timeline_id).unwrap(),
seg.kind.into(),
)
})
.collect();
let svg =
@@ -1531,15 +1537,13 @@ async fn handle_tenant_break(
// Obtains an lsn lease on the given timeline.
async fn lsn_lease_handler(
request: Request<Body>,
mut request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let lsn: Lsn = parse_query_param(&request, "lsn")?
.ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'lsn' query parameter")))?;
let lsn = json_request::<LsnLeaseRequest>(&mut request).await?.lsn;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);

View File

@@ -3,6 +3,7 @@ use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use tenant_size_model::svg::SvgBranchKind;
use tokio::sync::oneshot::error::RecvError;
use tokio::sync::Semaphore;
use tokio_util::sync::CancellationToken;
@@ -87,6 +88,9 @@ impl SegmentMeta {
LsnKind::BranchPoint => true,
LsnKind::GcCutOff => true,
LsnKind::BranchEnd => false,
LsnKind::LeasePoint => true,
LsnKind::LeaseStart => false,
LsnKind::LeaseEnd => false,
}
}
}
@@ -103,6 +107,21 @@ pub enum LsnKind {
GcCutOff,
/// Last record LSN
BranchEnd,
/// A LSN lease is granted here.
LeasePoint,
/// A lease starts from here.
LeaseStart,
/// Last record LSN for the lease (should have the same LSN as the previous [`LsnKind::LeaseStart`]).
LeaseEnd,
}
impl From<LsnKind> for SvgBranchKind {
fn from(kind: LsnKind) -> Self {
match kind {
LsnKind::LeasePoint | LsnKind::LeaseStart | LsnKind::LeaseEnd => SvgBranchKind::Lease,
_ => SvgBranchKind::Timeline,
}
}
}
/// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as
@@ -124,6 +143,9 @@ pub struct TimelineInputs {
/// Cutoff point calculated from the user-supplied 'max_retention_period'
retention_param_cutoff: Option<Lsn>,
/// Lease points on the timeline
lease_points: Vec<Lsn>,
}
/// Gathers the inputs for the tenant sizing model.
@@ -234,6 +256,13 @@ pub(super) async fn gather_inputs(
None
};
let lease_points = gc_info
.leases
.keys()
.filter(|&&lsn| lsn > ancestor_lsn)
.copied()
.collect::<Vec<_>>();
// next_gc_cutoff in parent branch are not of interest (right now at least), nor do we
// want to query any logical size before initdb_lsn.
let branch_start_lsn = cmp::max(ancestor_lsn, timeline.initdb_lsn);
@@ -248,6 +277,8 @@ pub(super) async fn gather_inputs(
.map(|lsn| (lsn, LsnKind::BranchPoint))
.collect::<Vec<_>>();
lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));
drop(gc_info);
// Add branch points we collected earlier, just in case there were any that were
@@ -296,6 +327,7 @@ pub(super) async fn gather_inputs(
if kind == LsnKind::BranchPoint {
branchpoint_segments.insert((timeline_id, lsn), segments.len());
}
segments.push(SegmentMeta {
segment: Segment {
parent: Some(parent),
@@ -306,7 +338,45 @@ pub(super) async fn gather_inputs(
timeline_id: timeline.timeline_id,
kind,
});
parent += 1;
parent = segments.len() - 1;
if kind == LsnKind::LeasePoint {
// Needs `LeaseStart` and `LeaseEnd` as well to model lease as a read-only branch that never writes data
// (i.e. it's lsn has not advanced from ancestor_lsn), and therefore the three segments have the same LSN
// value. Without the other two segments, the calculation code would not count the leased LSN as a point
// to be retained.
// Did not use `BranchStart` or `BranchEnd` so we can differentiate branches and leases during debug.
//
// Alt Design: rewrite the entire calculation code to be independent of timeline id. Both leases and
// branch points can be given a synthetic id so we can unite them.
let mut lease_parent = parent;
// Start of a lease.
segments.push(SegmentMeta {
segment: Segment {
parent: Some(lease_parent),
lsn: lsn.0,
size: None, // Filled in later, if necessary
needed: lsn > next_gc_cutoff, // only needed if the point is within rentention.
},
timeline_id: timeline.timeline_id,
kind: LsnKind::LeaseStart,
});
lease_parent += 1;
// End of the lease.
segments.push(SegmentMeta {
segment: Segment {
parent: Some(lease_parent),
lsn: lsn.0,
size: None, // Filled in later, if necessary
needed: true, // everything at the lease LSN must be readable => is needed
},
timeline_id: timeline.timeline_id,
kind: LsnKind::LeaseEnd,
});
}
}
// Current end of the timeline
@@ -332,6 +402,7 @@ pub(super) async fn gather_inputs(
pitr_cutoff,
next_gc_cutoff,
retention_param_cutoff,
lease_points,
});
}
@@ -674,7 +745,8 @@ fn verify_size_for_multiple_branches() {
"horizon_cutoff": "0/2210CD0",
"pitr_cutoff": "0/2210CD0",
"next_gc_cutoff": "0/2210CD0",
"retention_param_cutoff": null
"retention_param_cutoff": null,
"lease_points": []
},
{
"timeline_id": "454626700469f0a9914949b9d018e876",
@@ -684,7 +756,8 @@ fn verify_size_for_multiple_branches() {
"horizon_cutoff": "0/1817770",
"pitr_cutoff": "0/1817770",
"next_gc_cutoff": "0/1817770",
"retention_param_cutoff": null
"retention_param_cutoff": null,
"lease_points": []
},
{
"timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
@@ -694,7 +767,8 @@ fn verify_size_for_multiple_branches() {
"horizon_cutoff": "0/18B3D98",
"pitr_cutoff": "0/18B3D98",
"next_gc_cutoff": "0/18B3D98",
"retention_param_cutoff": null
"retention_param_cutoff": null,
"lease_points": []
}
]
}
@@ -749,7 +823,8 @@ fn verify_size_for_one_branch() {
"horizon_cutoff": "47/240A5860",
"pitr_cutoff": "47/240A5860",
"next_gc_cutoff": "47/240A5860",
"retention_param_cutoff": "0/0"
"retention_param_cutoff": "0/0",
"lease_points": []
}
]
}"#;

View File

@@ -14,6 +14,7 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
use arc_swap::ArcSwap;
use bytes::Bytes;
use camino::Utf8Path;
use chrono::{DateTime, Utc};
use enumset::EnumSet;
use fail::fail_point;
use once_cell::sync::Lazy;
@@ -1590,7 +1591,13 @@ impl Timeline {
let existing_lease = occupied.get_mut();
if valid_until > existing_lease.valid_until {
existing_lease.valid_until = valid_until;
let dt: DateTime<Utc> = valid_until.into();
info!("lease extended to {}", dt);
} else {
let dt: DateTime<Utc> = existing_lease.valid_until.into();
info!("existing lease covers greater length, valid until {}", dt);
}
existing_lease.clone()
} else {
// Reject already GC-ed LSN (lsn < latest_gc_cutoff)
@@ -1599,6 +1606,8 @@ impl Timeline {
bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
}
let dt: DateTime<Utc> = valid_until.into();
info!("lease created, valid until {}", dt);
entry.or_insert(LsnLease { valid_until }).clone()
}
};