Compare commits

...

14 Commits

Author SHA1 Message Date
Yuchen Liang
db378d1a5f Merge branch 'main' into yuchen/lsn-leases-poc 2024-06-17 14:01:39 -04:00
Yuchen Liang
0e897e8bf2 expose duration as a param for ; catch invalid(gc-ed) lsn lease request
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-17 14:01:08 -04:00
Yuchen Liang
b0b4ea609e fix comparison to take max lsn with valid lease; fix tests
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-17 13:43:04 -04:00
Yuchen Liang
be824220bb Merge branch 'main' into yuchen/lsn-leases-poc 2024-06-14 15:10:23 -04:00
Yuchen Liang
723ea86f40 make lease more robust
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-14 15:03:22 -04:00
Yuchen Liang
da55eebc83 use BTreeMap::retain
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-13 12:59:05 -04:00
Yuchen Liang
daffd5b998 update default lsn lease length
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-13 12:51:51 -04:00
Yuchen Liang
9a7d6a7526 Merge branch 'main' into yuchen/lsn-leases-poc 2024-06-13 09:03:21 -04:00
Yuchen Liang
9b25a4e5d2 Merge branch 'main' into yuchen/lsn-leases-poc 2024-06-13 08:25:03 -04:00
Yuchen Liang
be8f200815 setup demo unit test
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-12 12:27:06 -04:00
Yuchen Liang
a5c8f94165 release mutex guard early in test
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-12 08:56:40 -04:00
Yuchen Liang
f0a0515d4f fix lsn comparison logic
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-12 08:56:40 -04:00
Yuchen Liang
a7ca1d60b4 fix clippy
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-12 08:56:40 -04:00
Yuchen Liang
e834839255 DNM/Poc: use lsn leases to temporarily block gc
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-06-12 08:56:40 -04:00
7 changed files with 162 additions and 19 deletions

View File

@@ -177,6 +177,15 @@ serde_with::serde_conv!(
|value: String| -> Result<_, humantime::TimestampError> { humantime::parse_rfc3339(&value) }
);
impl LsnLease {
pub const DEFAULT_LENGTH: Duration = Duration::from_secs(10 * 60);
/// Checks whether the lease is expired.
pub fn is_expired(&self, now: &SystemTime) -> bool {
now > &self.valid_until
}
}
/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum ActivatingFrom {

View File

@@ -21,6 +21,7 @@ use pageserver_api::models::IngestAuxFilesRequest;
use pageserver_api::models::ListAuxFilesRequest;
use pageserver_api::models::LocationConfig;
use pageserver_api::models::LocationConfigListResponse;
use pageserver_api::models::LsnLease;
use pageserver_api::models::ShardParameters;
use pageserver_api::models::TenantDetails;
use pageserver_api::models::TenantLocationConfigResponse;
@@ -1730,7 +1731,7 @@ async fn lsn_lease_handler(
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
.await?;
let result = timeline
.make_lsn_lease(lsn, &ctx)
.make_lsn_lease(lsn, LsnLease::DEFAULT_LENGTH, &ctx)
.map_err(|e| ApiError::InternalServerError(e.context("lsn lease http handler")))?;
json_response(StatusCode::OK, result)

View File

@@ -9,6 +9,7 @@ use futures::stream::FuturesUnordered;
use futures::Stream;
use futures::StreamExt;
use pageserver_api::key::Key;
use pageserver_api::models::LsnLease;
use pageserver_api::models::TenantState;
use pageserver_api::models::{
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
@@ -935,7 +936,7 @@ impl PageServerHandler {
let timeline = self
.get_active_tenant_timeline(tenant_shard_id.tenant_id, timeline_id, shard_selector)
.await?;
let lease = timeline.make_lsn_lease(lsn, ctx)?;
let lease = timeline.make_lsn_lease(lsn, LsnLease::DEFAULT_LENGTH, ctx)?;
let valid_until = lease
.valid_until
.duration_since(SystemTime::UNIX_EPOCH)

View File

@@ -240,6 +240,7 @@ pub struct GcResult {
pub layers_needed_by_cutoff: u64,
pub layers_needed_by_pitr: u64,
pub layers_needed_by_branches: u64,
pub layers_needed_by_leases: u64,
pub layers_not_updated: u64,
pub layers_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.
@@ -269,6 +270,7 @@ impl AddAssign for GcResult {
self.layers_needed_by_pitr += other.layers_needed_by_pitr;
self.layers_needed_by_cutoff += other.layers_needed_by_cutoff;
self.layers_needed_by_branches += other.layers_needed_by_branches;
self.layers_needed_by_leases += other.layers_needed_by_leases;
self.layers_not_updated += other.layers_not_updated;
self.layers_removed += other.layers_removed;

View File

@@ -31,6 +31,7 @@ use remote_storage::DownloadError;
use remote_storage::GenericRemoteStorage;
use remote_storage::TimeoutOrCancel;
use std::fmt;
use std::time::SystemTime;
use storage_broker::BrokerClientChannel;
use tokio::io::BufReader;
use tokio::sync::watch;
@@ -65,9 +66,9 @@ use self::timeline::uninit::TimelineCreateGuard;
use self::timeline::uninit::TimelineExclusionError;
use self::timeline::uninit::UninitializedTimeline;
use self::timeline::EvictionTaskTenantState;
use self::timeline::GcCutoffs;
use self::timeline::TimelineResources;
use self::timeline::WaitLsnError;
use self::timeline::{GcCutoffs, GcInfo};
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::deletion_queue::DeletionQueueClient;
@@ -3010,12 +3011,13 @@ impl Tenant {
{
let mut target = timeline.gc_info.write().unwrap();
let now = SystemTime::now();
target.leases.retain(|_, lease| !lease.is_expired(&now));
match gc_cutoffs.remove(&timeline.timeline_id) {
Some(cutoffs) => {
*target = GcInfo {
retain_lsns: branchpoints,
cutoffs,
};
target.retain_lsns = branchpoints;
target.cutoffs = cutoffs;
}
None => {
// reasons for this being unavailable:
@@ -4048,6 +4050,7 @@ mod tests {
use bytes::{Bytes, BytesMut};
use hex_literal::hex;
use itertools::Itertools;
use models::LsnLease;
use pageserver_api::key::{AUX_FILES_KEY, AUX_KEY_PREFIX, NON_INHERITED_RANGE};
use pageserver_api::keyspace::KeySpace;
use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
@@ -4261,6 +4264,66 @@ mod tests {
tline.freeze_and_flush().await.map_err(|e| e.into())
}
#[tokio::test]
async fn test_lsn_leases() -> anyhow::Result<()> {
let (tenant, ctx) = TenantHarness::create("test_lsn_leases")?.load().await;
let key = Key::from_hex("010000000033333333444444445500000000").unwrap();
let end_lsn = Lsn(0x100);
let image_layers = (0x20..=0x90)
.step_by(0x10)
.map(|n| {
(
Lsn(n),
vec![(key, test_img(&format!("data key at {:x}", n)))],
)
})
.collect();
let timeline = tenant
.create_test_timeline_with_layers(
TIMELINE_ID,
Lsn(0x10),
DEFAULT_PG_VERSION,
&ctx,
Vec::new(),
image_layers,
end_lsn,
)
.await?;
let leased_lsns = [0x30, 0x50, 0x70];
let _: anyhow::Result<_> = leased_lsns.iter().try_for_each(|n| {
let _ = timeline.make_lsn_lease(Lsn(*n), LsnLease::DEFAULT_LENGTH, &ctx)?;
Ok(())
});
// Force set disk consistent lsn so we can get the cutoff at `end_lsn`.
timeline.force_set_disk_consistent_lsn(end_lsn);
let res = tenant
.gc_iteration(
Some(TIMELINE_ID),
0,
Duration::ZERO,
&CancellationToken::new(),
&ctx,
)
.await?;
// Keeping everything <= Lsn(0x80) b/c leases:
// 0/10: initdb layer
// (0/20..=0/70).step_by(0x10): image layers added when creating the timeline.
assert_eq!(res.layers_needed_by_leases, 7);
// Keeping 0/90 b/c it is the latest layer.
assert_eq!(res.layers_not_updated, 1);
// Removed 0/80.
assert_eq!(res.layers_removed, 1);
Ok(())
}
#[tokio::test]
async fn test_prohibit_branch_creation_on_garbage_collected_data() -> anyhow::Result<()> {
let (tenant, ctx) =

View File

@@ -14,6 +14,7 @@ use crate::tenant::config::defaults::DEFAULT_COMPACTION_PERIOD;
use crate::tenant::throttle::Stats;
use crate::tenant::timeline::CompactionError;
use crate::tenant::{Tenant, TenantState};
use pageserver_api::models::LsnLease;
use rand::Rng;
use tokio_util::sync::CancellationToken;
use tracing::*;
@@ -346,6 +347,10 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
// cutoff specified as time.
let ctx =
RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
// Delay GC by default lease period at pageserver restart.
tokio::time::sleep(LsnLease::DEFAULT_LENGTH).await;
let mut first = true;
loop {
tokio::select! {

View File

@@ -454,6 +454,12 @@ pub(crate) struct GcInfo {
/// The cutoff coordinates, which are combined by selecting the minimum.
pub(crate) cutoffs: GcCutoffs,
// TODO(yuchen): If we decide to incorporate `retain_lsns` into the same structure,
// this would look something like `BTreeMap<Lsn, Option<LsnLease>>`, where
// `None` suggests an infinite lease (will be used by current retain_lsns).
/// Leases granted to particular LSNs.
pub(crate) leases: BTreeMap<Lsn, LsnLease>,
}
impl GcInfo {
@@ -1558,14 +1564,34 @@ impl Timeline {
/// Obtains a temporary lease blocking garbage collection for the given LSN
pub(crate) fn make_lsn_lease(
&self,
_lsn: Lsn,
lsn: Lsn,
length: Duration,
_ctx: &RequestContext,
) -> anyhow::Result<LsnLease> {
const LEASE_LENGTH: Duration = Duration::from_secs(5 * 60);
let lease = LsnLease {
valid_until: SystemTime::now() + LEASE_LENGTH,
let latest_gc_cutoff_lsn = self.get_latest_gc_cutoff_lsn();
if lsn < *latest_gc_cutoff_lsn {
bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
}
let mut lease = LsnLease {
valid_until: SystemTime::now() + length,
};
// TODO: dummy implementation
{
let mut gc_info = self.gc_info.write().unwrap();
gc_info
.leases
.entry(lsn)
.and_modify(|existing| {
// Insert a lease only if it extends longer than the existing one.
if lease.valid_until > existing.valid_until {
*existing = lease.clone();
} else {
lease = existing.clone();
}
})
.or_insert(lease.clone());
}
Ok(lease)
}
@@ -4907,13 +4933,23 @@ impl Timeline {
return Err(GcError::TimelineCancelled);
}
let (horizon_cutoff, pitr_cutoff, retain_lsns) = {
let (horizon_cutoff, pitr_cutoff, retain_lsns, max_lsn_with_valid_lease) = {
let gc_info = self.gc_info.read().unwrap();
let horizon_cutoff = min(gc_info.cutoffs.horizon, self.get_disk_consistent_lsn());
let pitr_cutoff = gc_info.cutoffs.pitr;
let retain_lsns = gc_info.retain_lsns.clone();
(horizon_cutoff, pitr_cutoff, retain_lsns)
// Gets the minimum LSN that holds the valid lease.
// Caveat: This value could be stale since we rely on refresh_gc_info to invalidate leases,
// so there could be leases invalidated between the refresh and here.
let max_lsn_with_valid_lease = gc_info.leases.last_key_value().map(|(lsn, _)| *lsn);
(
horizon_cutoff,
pitr_cutoff,
retain_lsns,
max_lsn_with_valid_lease,
)
};
let mut new_gc_cutoff = Lsn::min(horizon_cutoff, pitr_cutoff);
@@ -4944,7 +4980,13 @@ impl Timeline {
.set(Lsn::INVALID.0 as i64);
let res = self
.gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff)
.gc_timeline(
horizon_cutoff,
pitr_cutoff,
retain_lsns,
new_gc_cutoff,
max_lsn_with_valid_lease,
)
.instrument(
info_span!("gc_timeline", timeline_id = %self.timeline_id, cutoff = %new_gc_cutoff),
)
@@ -4962,6 +5004,7 @@ impl Timeline {
pitr_cutoff: Lsn,
retain_lsns: Vec<Lsn>,
new_gc_cutoff: Lsn,
max_lsn_with_valid_lease: Option<Lsn>,
) -> Result<GcResult, GcError> {
// FIXME: if there is an ongoing detach_from_ancestor, we should just skip gc
@@ -5009,7 +5052,8 @@ impl Timeline {
// 1. it is older than cutoff LSN;
// 2. it is older than PITR interval;
// 3. it doesn't need to be retained for 'retain_lsns';
// 4. newer on-disk image layers cover the layer's whole key range
// 4. it does not need to be kept for LSNs holding valid leases (logic is very similar to retain_lsns);
// 5. newer on-disk image layers cover the layer's whole key range
//
// TODO holding a write lock is too agressive and avoidable
let mut guard = self.layers.write().await;
@@ -5060,7 +5104,20 @@ impl Timeline {
}
}
// 4. Is there a later on-disk layer for this relation?
// 4. Is there a valid lease that requires us to keep this layer?
if let Some(lsn) = &max_lsn_with_valid_lease {
if &l.get_lsn_range().start <= lsn {
debug!(
"keeping {} because there is a valid lease preventing GC at {}",
l.layer_name(),
lsn,
);
result.layers_needed_by_leases += 1;
continue 'outer;
}
}
// 5. Is there a later on-disk layer for this relation?
//
// The end-LSN is exclusive, while disk_consistent_lsn is
// inclusive. For example, if disk_consistent_lsn is 100, it is
@@ -5082,13 +5139,13 @@ impl Timeline {
if !layers
.image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
{
debug!("keeping {} because it is the latest layer", l.layer_name());
info!("keeping {} because it is the latest layer", l.layer_name());
result.layers_not_updated += 1;
continue 'outer;
}
// We didn't find any reason to keep this file, so remove it.
debug!(
info!(
"garbage collecting {} is_dropped: xx is_incremental: {}",
l.layer_name(),
l.is_incremental(),
@@ -5438,6 +5495,11 @@ impl Timeline {
self.last_record_lsn.advance(new_lsn);
}
#[cfg(test)]
pub(super) fn force_set_disk_consistent_lsn(&self, new_value: Lsn) {
self.disk_consistent_lsn.store(new_value);
}
/// Force create an image layer and place it into the layer map.
///
/// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]