mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-15 04:00:38 +00:00
Compare commits
14 Commits
release-pr
...
yuchen/lsn
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
db378d1a5f | ||
|
|
0e897e8bf2 | ||
|
|
b0b4ea609e | ||
|
|
be824220bb | ||
|
|
723ea86f40 | ||
|
|
da55eebc83 | ||
|
|
daffd5b998 | ||
|
|
9a7d6a7526 | ||
|
|
9b25a4e5d2 | ||
|
|
be8f200815 | ||
|
|
a5c8f94165 | ||
|
|
f0a0515d4f | ||
|
|
a7ca1d60b4 | ||
|
|
e834839255 |
@@ -177,6 +177,15 @@ serde_with::serde_conv!(
|
||||
|value: String| -> Result<_, humantime::TimestampError> { humantime::parse_rfc3339(&value) }
|
||||
);
|
||||
|
||||
impl LsnLease {
|
||||
pub const DEFAULT_LENGTH: Duration = Duration::from_secs(10 * 60);
|
||||
|
||||
/// Checks whether the lease is expired.
|
||||
pub fn is_expired(&self, now: &SystemTime) -> bool {
|
||||
now > &self.valid_until
|
||||
}
|
||||
}
|
||||
|
||||
/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub enum ActivatingFrom {
|
||||
|
||||
@@ -21,6 +21,7 @@ use pageserver_api::models::IngestAuxFilesRequest;
|
||||
use pageserver_api::models::ListAuxFilesRequest;
|
||||
use pageserver_api::models::LocationConfig;
|
||||
use pageserver_api::models::LocationConfigListResponse;
|
||||
use pageserver_api::models::LsnLease;
|
||||
use pageserver_api::models::ShardParameters;
|
||||
use pageserver_api::models::TenantDetails;
|
||||
use pageserver_api::models::TenantLocationConfigResponse;
|
||||
@@ -1730,7 +1731,7 @@ async fn lsn_lease_handler(
|
||||
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
|
||||
.await?;
|
||||
let result = timeline
|
||||
.make_lsn_lease(lsn, &ctx)
|
||||
.make_lsn_lease(lsn, LsnLease::DEFAULT_LENGTH, &ctx)
|
||||
.map_err(|e| ApiError::InternalServerError(e.context("lsn lease http handler")))?;
|
||||
|
||||
json_response(StatusCode::OK, result)
|
||||
|
||||
@@ -9,6 +9,7 @@ use futures::stream::FuturesUnordered;
|
||||
use futures::Stream;
|
||||
use futures::StreamExt;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::models::LsnLease;
|
||||
use pageserver_api::models::TenantState;
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
@@ -935,7 +936,7 @@ impl PageServerHandler {
|
||||
let timeline = self
|
||||
.get_active_tenant_timeline(tenant_shard_id.tenant_id, timeline_id, shard_selector)
|
||||
.await?;
|
||||
let lease = timeline.make_lsn_lease(lsn, ctx)?;
|
||||
let lease = timeline.make_lsn_lease(lsn, LsnLease::DEFAULT_LENGTH, ctx)?;
|
||||
let valid_until = lease
|
||||
.valid_until
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
|
||||
@@ -240,6 +240,7 @@ pub struct GcResult {
|
||||
pub layers_needed_by_cutoff: u64,
|
||||
pub layers_needed_by_pitr: u64,
|
||||
pub layers_needed_by_branches: u64,
|
||||
pub layers_needed_by_leases: u64,
|
||||
pub layers_not_updated: u64,
|
||||
pub layers_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.
|
||||
|
||||
@@ -269,6 +270,7 @@ impl AddAssign for GcResult {
|
||||
self.layers_needed_by_pitr += other.layers_needed_by_pitr;
|
||||
self.layers_needed_by_cutoff += other.layers_needed_by_cutoff;
|
||||
self.layers_needed_by_branches += other.layers_needed_by_branches;
|
||||
self.layers_needed_by_leases += other.layers_needed_by_leases;
|
||||
self.layers_not_updated += other.layers_not_updated;
|
||||
self.layers_removed += other.layers_removed;
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ use remote_storage::DownloadError;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use remote_storage::TimeoutOrCancel;
|
||||
use std::fmt;
|
||||
use std::time::SystemTime;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::sync::watch;
|
||||
@@ -65,9 +66,9 @@ use self::timeline::uninit::TimelineCreateGuard;
|
||||
use self::timeline::uninit::TimelineExclusionError;
|
||||
use self::timeline::uninit::UninitializedTimeline;
|
||||
use self::timeline::EvictionTaskTenantState;
|
||||
use self::timeline::GcCutoffs;
|
||||
use self::timeline::TimelineResources;
|
||||
use self::timeline::WaitLsnError;
|
||||
use self::timeline::{GcCutoffs, GcInfo};
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::deletion_queue::DeletionQueueClient;
|
||||
@@ -3010,12 +3011,13 @@ impl Tenant {
|
||||
{
|
||||
let mut target = timeline.gc_info.write().unwrap();
|
||||
|
||||
let now = SystemTime::now();
|
||||
target.leases.retain(|_, lease| !lease.is_expired(&now));
|
||||
|
||||
match gc_cutoffs.remove(&timeline.timeline_id) {
|
||||
Some(cutoffs) => {
|
||||
*target = GcInfo {
|
||||
retain_lsns: branchpoints,
|
||||
cutoffs,
|
||||
};
|
||||
target.retain_lsns = branchpoints;
|
||||
target.cutoffs = cutoffs;
|
||||
}
|
||||
None => {
|
||||
// reasons for this being unavailable:
|
||||
@@ -4048,6 +4050,7 @@ mod tests {
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use hex_literal::hex;
|
||||
use itertools::Itertools;
|
||||
use models::LsnLease;
|
||||
use pageserver_api::key::{AUX_FILES_KEY, AUX_KEY_PREFIX, NON_INHERITED_RANGE};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
|
||||
@@ -4261,6 +4264,66 @@ mod tests {
|
||||
tline.freeze_and_flush().await.map_err(|e| e.into())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_lsn_leases() -> anyhow::Result<()> {
|
||||
let (tenant, ctx) = TenantHarness::create("test_lsn_leases")?.load().await;
|
||||
let key = Key::from_hex("010000000033333333444444445500000000").unwrap();
|
||||
|
||||
let end_lsn = Lsn(0x100);
|
||||
let image_layers = (0x20..=0x90)
|
||||
.step_by(0x10)
|
||||
.map(|n| {
|
||||
(
|
||||
Lsn(n),
|
||||
vec![(key, test_img(&format!("data key at {:x}", n)))],
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let timeline = tenant
|
||||
.create_test_timeline_with_layers(
|
||||
TIMELINE_ID,
|
||||
Lsn(0x10),
|
||||
DEFAULT_PG_VERSION,
|
||||
&ctx,
|
||||
Vec::new(),
|
||||
image_layers,
|
||||
end_lsn,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let leased_lsns = [0x30, 0x50, 0x70];
|
||||
let _: anyhow::Result<_> = leased_lsns.iter().try_for_each(|n| {
|
||||
let _ = timeline.make_lsn_lease(Lsn(*n), LsnLease::DEFAULT_LENGTH, &ctx)?;
|
||||
Ok(())
|
||||
});
|
||||
|
||||
// Force set disk consistent lsn so we can get the cutoff at `end_lsn`.
|
||||
|
||||
timeline.force_set_disk_consistent_lsn(end_lsn);
|
||||
|
||||
let res = tenant
|
||||
.gc_iteration(
|
||||
Some(TIMELINE_ID),
|
||||
0,
|
||||
Duration::ZERO,
|
||||
&CancellationToken::new(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Keeping everything <= Lsn(0x80) b/c leases:
|
||||
// 0/10: initdb layer
|
||||
// (0/20..=0/70).step_by(0x10): image layers added when creating the timeline.
|
||||
assert_eq!(res.layers_needed_by_leases, 7);
|
||||
// Keeping 0/90 b/c it is the latest layer.
|
||||
assert_eq!(res.layers_not_updated, 1);
|
||||
// Removed 0/80.
|
||||
assert_eq!(res.layers_removed, 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prohibit_branch_creation_on_garbage_collected_data() -> anyhow::Result<()> {
|
||||
let (tenant, ctx) =
|
||||
|
||||
@@ -14,6 +14,7 @@ use crate::tenant::config::defaults::DEFAULT_COMPACTION_PERIOD;
|
||||
use crate::tenant::throttle::Stats;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use pageserver_api::models::LsnLease;
|
||||
use rand::Rng;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
@@ -346,6 +347,10 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
// cutoff specified as time.
|
||||
let ctx =
|
||||
RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
|
||||
|
||||
// Delay GC by default lease period at pageserver restart.
|
||||
tokio::time::sleep(LsnLease::DEFAULT_LENGTH).await;
|
||||
|
||||
let mut first = true;
|
||||
loop {
|
||||
tokio::select! {
|
||||
|
||||
@@ -454,6 +454,12 @@ pub(crate) struct GcInfo {
|
||||
|
||||
/// The cutoff coordinates, which are combined by selecting the minimum.
|
||||
pub(crate) cutoffs: GcCutoffs,
|
||||
|
||||
// TODO(yuchen): If we decide to incorporate `retain_lsns` into the same structure,
|
||||
// this would look something like `BTreeMap<Lsn, Option<LsnLease>>`, where
|
||||
// `None` suggests an infinite lease (will be used by current retain_lsns).
|
||||
/// Leases granted to particular LSNs.
|
||||
pub(crate) leases: BTreeMap<Lsn, LsnLease>,
|
||||
}
|
||||
|
||||
impl GcInfo {
|
||||
@@ -1558,14 +1564,34 @@ impl Timeline {
|
||||
/// Obtains a temporary lease blocking garbage collection for the given LSN
|
||||
pub(crate) fn make_lsn_lease(
|
||||
&self,
|
||||
_lsn: Lsn,
|
||||
lsn: Lsn,
|
||||
length: Duration,
|
||||
_ctx: &RequestContext,
|
||||
) -> anyhow::Result<LsnLease> {
|
||||
const LEASE_LENGTH: Duration = Duration::from_secs(5 * 60);
|
||||
let lease = LsnLease {
|
||||
valid_until: SystemTime::now() + LEASE_LENGTH,
|
||||
let latest_gc_cutoff_lsn = self.get_latest_gc_cutoff_lsn();
|
||||
if lsn < *latest_gc_cutoff_lsn {
|
||||
bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
|
||||
}
|
||||
|
||||
let mut lease = LsnLease {
|
||||
valid_until: SystemTime::now() + length,
|
||||
};
|
||||
// TODO: dummy implementation
|
||||
|
||||
{
|
||||
let mut gc_info = self.gc_info.write().unwrap();
|
||||
gc_info
|
||||
.leases
|
||||
.entry(lsn)
|
||||
.and_modify(|existing| {
|
||||
// Insert a lease only if it extends longer than the existing one.
|
||||
if lease.valid_until > existing.valid_until {
|
||||
*existing = lease.clone();
|
||||
} else {
|
||||
lease = existing.clone();
|
||||
}
|
||||
})
|
||||
.or_insert(lease.clone());
|
||||
}
|
||||
Ok(lease)
|
||||
}
|
||||
|
||||
@@ -4907,13 +4933,23 @@ impl Timeline {
|
||||
return Err(GcError::TimelineCancelled);
|
||||
}
|
||||
|
||||
let (horizon_cutoff, pitr_cutoff, retain_lsns) = {
|
||||
let (horizon_cutoff, pitr_cutoff, retain_lsns, max_lsn_with_valid_lease) = {
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
|
||||
let horizon_cutoff = min(gc_info.cutoffs.horizon, self.get_disk_consistent_lsn());
|
||||
let pitr_cutoff = gc_info.cutoffs.pitr;
|
||||
let retain_lsns = gc_info.retain_lsns.clone();
|
||||
(horizon_cutoff, pitr_cutoff, retain_lsns)
|
||||
|
||||
// Gets the minimum LSN that holds the valid lease.
|
||||
// Caveat: This value could be stale since we rely on refresh_gc_info to invalidate leases,
|
||||
// so there could be leases invalidated between the refresh and here.
|
||||
let max_lsn_with_valid_lease = gc_info.leases.last_key_value().map(|(lsn, _)| *lsn);
|
||||
(
|
||||
horizon_cutoff,
|
||||
pitr_cutoff,
|
||||
retain_lsns,
|
||||
max_lsn_with_valid_lease,
|
||||
)
|
||||
};
|
||||
|
||||
let mut new_gc_cutoff = Lsn::min(horizon_cutoff, pitr_cutoff);
|
||||
@@ -4944,7 +4980,13 @@ impl Timeline {
|
||||
.set(Lsn::INVALID.0 as i64);
|
||||
|
||||
let res = self
|
||||
.gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff)
|
||||
.gc_timeline(
|
||||
horizon_cutoff,
|
||||
pitr_cutoff,
|
||||
retain_lsns,
|
||||
new_gc_cutoff,
|
||||
max_lsn_with_valid_lease,
|
||||
)
|
||||
.instrument(
|
||||
info_span!("gc_timeline", timeline_id = %self.timeline_id, cutoff = %new_gc_cutoff),
|
||||
)
|
||||
@@ -4962,6 +5004,7 @@ impl Timeline {
|
||||
pitr_cutoff: Lsn,
|
||||
retain_lsns: Vec<Lsn>,
|
||||
new_gc_cutoff: Lsn,
|
||||
max_lsn_with_valid_lease: Option<Lsn>,
|
||||
) -> Result<GcResult, GcError> {
|
||||
// FIXME: if there is an ongoing detach_from_ancestor, we should just skip gc
|
||||
|
||||
@@ -5009,7 +5052,8 @@ impl Timeline {
|
||||
// 1. it is older than cutoff LSN;
|
||||
// 2. it is older than PITR interval;
|
||||
// 3. it doesn't need to be retained for 'retain_lsns';
|
||||
// 4. newer on-disk image layers cover the layer's whole key range
|
||||
// 4. it does not need to be kept for LSNs holding valid leases (logic is very similar to retain_lsns);
|
||||
// 5. newer on-disk image layers cover the layer's whole key range
|
||||
//
|
||||
// TODO holding a write lock is too agressive and avoidable
|
||||
let mut guard = self.layers.write().await;
|
||||
@@ -5060,7 +5104,20 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Is there a later on-disk layer for this relation?
|
||||
// 4. Is there a valid lease that requires us to keep this layer?
|
||||
if let Some(lsn) = &max_lsn_with_valid_lease {
|
||||
if &l.get_lsn_range().start <= lsn {
|
||||
debug!(
|
||||
"keeping {} because there is a valid lease preventing GC at {}",
|
||||
l.layer_name(),
|
||||
lsn,
|
||||
);
|
||||
result.layers_needed_by_leases += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Is there a later on-disk layer for this relation?
|
||||
//
|
||||
// The end-LSN is exclusive, while disk_consistent_lsn is
|
||||
// inclusive. For example, if disk_consistent_lsn is 100, it is
|
||||
@@ -5082,13 +5139,13 @@ impl Timeline {
|
||||
if !layers
|
||||
.image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
|
||||
{
|
||||
debug!("keeping {} because it is the latest layer", l.layer_name());
|
||||
info!("keeping {} because it is the latest layer", l.layer_name());
|
||||
result.layers_not_updated += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
// We didn't find any reason to keep this file, so remove it.
|
||||
debug!(
|
||||
info!(
|
||||
"garbage collecting {} is_dropped: xx is_incremental: {}",
|
||||
l.layer_name(),
|
||||
l.is_incremental(),
|
||||
@@ -5438,6 +5495,11 @@ impl Timeline {
|
||||
self.last_record_lsn.advance(new_lsn);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn force_set_disk_consistent_lsn(&self, new_value: Lsn) {
|
||||
self.disk_consistent_lsn.store(new_value);
|
||||
}
|
||||
|
||||
/// Force create an image layer and place it into the layer map.
|
||||
///
|
||||
/// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
|
||||
|
||||
Reference in New Issue
Block a user