mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
feat: persistent gc blocking (#8600)
Currently, we do not have facilities to persistently block GC on a tenant for whatever reason. We could do a tenant configuration update, but that is risky for generation numbers and would also be transient. Introduce a `gc_block` facility in the tenant, which manages per timeline blocking reasons. Additionally, add HTTP endpoints for enabling/disabling manual gc blocking for a specific timeline. For debugging, individual tenant status now includes a similar string representation logged when GC is skipped. Cc: #6994
This commit is contained in:
@@ -637,6 +637,13 @@ pub struct TenantInfo {
|
||||
pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
|
||||
pub attachment_status: TenantAttachmentStatus,
|
||||
pub generation: u32,
|
||||
|
||||
/// Opaque explanation if gc is being blocked.
|
||||
///
|
||||
/// Only looked up for the individual tenant detail, not the listing. This is purely for
|
||||
/// debugging, not included in openapi.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub gc_blocking: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
@@ -1427,6 +1434,7 @@ mod tests {
|
||||
current_physical_size: Some(42),
|
||||
attachment_status: TenantAttachmentStatus::Attached,
|
||||
generation: 1,
|
||||
gc_blocking: None,
|
||||
};
|
||||
let expected_active = json!({
|
||||
"id": original_active.id.to_string(),
|
||||
@@ -1449,6 +1457,7 @@ mod tests {
|
||||
current_physical_size: Some(42),
|
||||
attachment_status: TenantAttachmentStatus::Attached,
|
||||
generation: 1,
|
||||
gc_blocking: None,
|
||||
};
|
||||
let expected_broken = json!({
|
||||
"id": original_broken.id.to_string(),
|
||||
|
||||
@@ -308,6 +308,45 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
type: string
|
||||
|
||||
/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/block_gc:
|
||||
parameters:
|
||||
- name: tenant_shard_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: Persistently add a gc blocking at the tenant level because of this timeline
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
|
||||
/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/unblock_gc:
|
||||
parameters:
|
||||
- name: tenant_shard_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: Persistently remove a tenant level gc blocking for this timeline
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
|
||||
/v1/tenant/{tenant_shard_id}/location_config:
|
||||
parameters:
|
||||
- name: tenant_shard_id
|
||||
|
||||
@@ -935,6 +935,7 @@ async fn tenant_list_handler(
|
||||
generation: (*gen)
|
||||
.into()
|
||||
.expect("Tenants are always attached with a generation"),
|
||||
gc_blocking: None,
|
||||
})
|
||||
.collect::<Vec<TenantInfo>>();
|
||||
|
||||
@@ -986,6 +987,7 @@ async fn tenant_status(
|
||||
.generation()
|
||||
.into()
|
||||
.expect("Tenants are always attached with a generation"),
|
||||
gc_blocking: tenant.gc_block.summary().map(|x| format!("{x:?}")),
|
||||
},
|
||||
walredo: tenant.wal_redo_manager_status(),
|
||||
timelines: tenant.list_timeline_ids(),
|
||||
@@ -1226,6 +1228,72 @@ async fn evict_timeline_layer_handler(
|
||||
}
|
||||
}
|
||||
|
||||
async fn timeline_gc_blocking_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
block_or_unblock_gc(request, true).await
|
||||
}
|
||||
|
||||
async fn timeline_gc_unblocking_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
block_or_unblock_gc(request, false).await
|
||||
}
|
||||
|
||||
/// Adding a block is `POST ../block_gc`, removing a block is `POST ../unblock_gc`.
|
||||
///
|
||||
/// Both are technically unsafe because they might fire off index uploads, thus they are POST.
|
||||
async fn block_or_unblock_gc(
|
||||
request: Request<Body>,
|
||||
block: bool,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
use crate::tenant::{
|
||||
remote_timeline_client::WaitCompletionError, upload_queue::NotInitialized,
|
||||
};
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let state = get_state(&request);
|
||||
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
|
||||
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
|
||||
|
||||
let timeline = tenant.get_timeline(timeline_id, true)?;
|
||||
|
||||
let fut = async {
|
||||
if block {
|
||||
timeline.block_gc(&tenant).await.map(|_| ())
|
||||
} else {
|
||||
timeline.unblock_gc(&tenant).await
|
||||
}
|
||||
};
|
||||
|
||||
let span = tracing::info_span!(
|
||||
"block_or_unblock_gc",
|
||||
tenant_id = %tenant_shard_id.tenant_id,
|
||||
shard_id = %tenant_shard_id.shard_slug(),
|
||||
timeline_id = %timeline_id,
|
||||
block = block,
|
||||
);
|
||||
|
||||
let res = fut.instrument(span).await;
|
||||
|
||||
res.map_err(|e| {
|
||||
if e.is::<NotInitialized>() || e.is::<WaitCompletionError>() {
|
||||
ApiError::ShuttingDown
|
||||
} else {
|
||||
ApiError::InternalServerError(e)
|
||||
}
|
||||
})?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
/// Get tenant_size SVG graph along with the JSON data.
|
||||
fn synthetic_size_html_response(
|
||||
inputs: ModelInputs,
|
||||
@@ -2904,6 +2972,14 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
|r| api_handler(r, evict_timeline_layer_handler),
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/block_gc",
|
||||
|r| api_handler(r, timeline_gc_blocking_handler),
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/unblock_gc",
|
||||
|r| api_handler(r, timeline_gc_unblocking_handler),
|
||||
)
|
||||
.post("/v1/tenant/:tenant_shard_id/heatmap_upload", |r| {
|
||||
api_handler(r, secondary_upload_handler)
|
||||
})
|
||||
|
||||
@@ -148,6 +148,7 @@ pub(crate) mod timeline;
|
||||
|
||||
pub mod size;
|
||||
|
||||
mod gc_block;
|
||||
pub(crate) mod throttle;
|
||||
|
||||
pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
@@ -303,6 +304,12 @@ pub struct Tenant {
|
||||
/// An ongoing timeline detach must be checked during attempts to GC or compact a timeline.
|
||||
ongoing_timeline_detach: std::sync::Mutex<Option<(TimelineId, utils::completion::Barrier)>>,
|
||||
|
||||
/// `index_part.json` based gc blocking reason tracking.
|
||||
///
|
||||
/// New gc iterations must start a new iteration by acquiring `GcBlock::start` before
|
||||
/// proceeding.
|
||||
pub(crate) gc_block: gc_block::GcBlock,
|
||||
|
||||
l0_flush_global_state: L0FlushGlobalState,
|
||||
}
|
||||
|
||||
@@ -1036,6 +1043,8 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
let mut gc_blocks = HashMap::new();
|
||||
|
||||
// For every timeline, download the metadata file, scan the local directory,
|
||||
// and build a layer map that contains an entry for each remote and local
|
||||
// layer file.
|
||||
@@ -1045,6 +1054,16 @@ impl Tenant {
|
||||
.remove(&timeline_id)
|
||||
.expect("just put it in above");
|
||||
|
||||
if let Some(blocking) = index_part.gc_blocking.as_ref() {
|
||||
// could just filter these away, but it helps while testing
|
||||
anyhow::ensure!(
|
||||
!blocking.reasons.is_empty(),
|
||||
"index_part for {timeline_id} is malformed: it should not have gc blocking with zero reasons"
|
||||
);
|
||||
let prev = gc_blocks.insert(timeline_id, blocking.reasons);
|
||||
assert!(prev.is_none());
|
||||
}
|
||||
|
||||
// TODO again handle early failure
|
||||
self.load_remote_timeline(
|
||||
timeline_id,
|
||||
@@ -1089,6 +1108,8 @@ impl Tenant {
|
||||
// IndexPart is the source of truth.
|
||||
self.clean_up_timelines(&existent_timelines)?;
|
||||
|
||||
self.gc_block.set_scanned(gc_blocks);
|
||||
|
||||
fail::fail_point!("attach-before-activate", |_| {
|
||||
anyhow::bail!("attach-before-activate");
|
||||
});
|
||||
@@ -1679,6 +1700,14 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
let _guard = match self.gc_block.start().await {
|
||||
Ok(guard) => guard,
|
||||
Err(reasons) => {
|
||||
info!("Skipping GC: {reasons}");
|
||||
return Ok(GcResult::default());
|
||||
}
|
||||
};
|
||||
|
||||
self.gc_iteration_internal(target_timeline_id, horizon, pitr, cancel, ctx)
|
||||
.await
|
||||
}
|
||||
@@ -2691,6 +2720,7 @@ impl Tenant {
|
||||
)),
|
||||
tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)),
|
||||
ongoing_timeline_detach: std::sync::Mutex::default(),
|
||||
gc_block: Default::default(),
|
||||
l0_flush_global_state,
|
||||
}
|
||||
}
|
||||
|
||||
213
pageserver/src/tenant/gc_block.rs
Normal file
213
pageserver/src/tenant/gc_block.rs
Normal file
@@ -0,0 +1,213 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use super::remote_timeline_client::index::GcBlockingReason;
|
||||
|
||||
type Storage = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct GcBlock {
|
||||
/// The timelines which have current reasons to block gc.
|
||||
///
|
||||
/// LOCK ORDER: this is held locked while scheduling the next index_part update. This is done
|
||||
/// to keep the this field up to date with RemoteTimelineClient `upload_queue.dirty`.
|
||||
reasons: std::sync::Mutex<Storage>,
|
||||
blocking: tokio::sync::Mutex<()>,
|
||||
}
|
||||
|
||||
impl GcBlock {
|
||||
/// Start another gc iteration.
|
||||
///
|
||||
/// Returns a guard to be held for the duration of gc iteration to allow synchronizing with
|
||||
/// it's ending, or if not currently possible, a value describing the reasons why not.
|
||||
///
|
||||
/// Cancellation safe.
|
||||
pub(super) async fn start(&self) -> Result<Guard<'_>, BlockingReasons> {
|
||||
let reasons = {
|
||||
let g = self.reasons.lock().unwrap();
|
||||
|
||||
// TODO: the assumption is that this method gets called periodically. in prod, we use 1h, in
|
||||
// tests, we use everything. we should warn if the gc has been consecutively blocked
|
||||
// for more than 1h (within single tenant session?).
|
||||
BlockingReasons::clean_and_summarize(g)
|
||||
};
|
||||
|
||||
if let Some(reasons) = reasons {
|
||||
Err(reasons)
|
||||
} else {
|
||||
Ok(Guard {
|
||||
_inner: self.blocking.lock().await,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn summary(&self) -> Option<BlockingReasons> {
|
||||
let g = self.reasons.lock().unwrap();
|
||||
|
||||
BlockingReasons::summarize(&g)
|
||||
}
|
||||
|
||||
/// Start blocking gc for this one timeline for the given reason.
|
||||
///
|
||||
/// This is not a guard based API but instead it mimics set API. The returned future will not
|
||||
/// resolve until an existing gc round has completed.
|
||||
///
|
||||
/// Returns true if this block was new, false if gc was already blocked for this reason.
|
||||
///
|
||||
/// Cancellation safe: cancelling after first poll will keep the reason to block gc, but will
|
||||
/// keep the gc blocking reason.
|
||||
pub(crate) async fn insert(
|
||||
&self,
|
||||
timeline: &super::Timeline,
|
||||
reason: GcBlockingReason,
|
||||
) -> anyhow::Result<bool> {
|
||||
let (added, uploaded) = {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
let set = g.entry(timeline.timeline_id).or_default();
|
||||
let added = set.insert(reason);
|
||||
|
||||
// LOCK ORDER: intentionally hold the lock, see self.reasons.
|
||||
let uploaded = timeline
|
||||
.remote_client
|
||||
.schedule_insert_gc_block_reason(reason)?;
|
||||
|
||||
(added, uploaded)
|
||||
};
|
||||
|
||||
uploaded.await?;
|
||||
|
||||
// ensure that any ongoing gc iteration has completed
|
||||
drop(self.blocking.lock().await);
|
||||
|
||||
Ok(added)
|
||||
}
|
||||
|
||||
/// Remove blocking gc for this one timeline and the given reason.
|
||||
pub(crate) async fn remove(
|
||||
&self,
|
||||
timeline: &super::Timeline,
|
||||
reason: GcBlockingReason,
|
||||
) -> anyhow::Result<()> {
|
||||
use std::collections::hash_map::Entry;
|
||||
|
||||
super::span::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let (remaining_blocks, uploaded) = {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
match g.entry(timeline.timeline_id) {
|
||||
Entry::Occupied(mut oe) => {
|
||||
let set = oe.get_mut();
|
||||
set.remove(reason);
|
||||
if set.is_empty() {
|
||||
oe.remove();
|
||||
}
|
||||
}
|
||||
Entry::Vacant(_) => {
|
||||
// we must still do the index_part.json update regardless, in case we had earlier
|
||||
// been cancelled
|
||||
}
|
||||
}
|
||||
|
||||
let remaining_blocks = g.len();
|
||||
|
||||
// LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons
|
||||
let uploaded = timeline
|
||||
.remote_client
|
||||
.schedule_remove_gc_block_reason(reason)?;
|
||||
|
||||
(remaining_blocks, uploaded)
|
||||
};
|
||||
uploaded.await?;
|
||||
|
||||
// no need to synchronize with gc iteration again
|
||||
|
||||
if remaining_blocks > 0 {
|
||||
tracing::info!(remaining_blocks, removed=?reason, "gc blocking removed, but gc remains blocked");
|
||||
} else {
|
||||
tracing::info!("gc is now unblocked for the tenant");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
|
||||
let unblocked = {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
if g.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
g.remove(&timeline.timeline_id);
|
||||
|
||||
BlockingReasons::clean_and_summarize(g).is_none()
|
||||
};
|
||||
|
||||
if unblocked {
|
||||
tracing::info!("gc is now unblocked following deletion");
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize with the non-deleted timelines of this tenant.
|
||||
pub(crate) fn set_scanned(&self, scanned: Storage) {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
assert!(g.is_empty());
|
||||
g.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
|
||||
|
||||
if let Some(reasons) = BlockingReasons::clean_and_summarize(g) {
|
||||
tracing::info!(summary=?reasons, "initialized with gc blocked");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct Guard<'a> {
|
||||
_inner: tokio::sync::MutexGuard<'a, ()>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct BlockingReasons {
|
||||
timelines: usize,
|
||||
reasons: enumset::EnumSet<GcBlockingReason>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BlockingReasons {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{} timelines block for {:?}",
|
||||
self.timelines, self.reasons
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockingReasons {
|
||||
fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
|
||||
let mut reasons = enumset::EnumSet::empty();
|
||||
g.retain(|_key, value| {
|
||||
reasons = reasons.union(*value);
|
||||
!value.is_empty()
|
||||
});
|
||||
if !g.is_empty() {
|
||||
Some(BlockingReasons {
|
||||
timelines: g.len(),
|
||||
reasons,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
|
||||
if g.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let reasons = g
|
||||
.values()
|
||||
.fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next));
|
||||
Some(BlockingReasons {
|
||||
timelines: g.len(),
|
||||
reasons,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -800,6 +800,123 @@ impl RemoteTimelineClient {
|
||||
.context("wait completion")
|
||||
}
|
||||
|
||||
/// Adds a gc blocking reason for this timeline if one does not exist already.
|
||||
///
|
||||
/// A retryable step of timeline detach ancestor.
|
||||
///
|
||||
/// Returns a future which waits until the completion of the upload.
|
||||
pub(crate) fn schedule_insert_gc_block_reason(
|
||||
self: &Arc<Self>,
|
||||
reason: index::GcBlockingReason,
|
||||
) -> Result<impl std::future::Future<Output = Result<(), WaitCompletionError>>, NotInitialized>
|
||||
{
|
||||
let maybe_barrier = {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
if let index::GcBlockingReason::DetachAncestor = reason {
|
||||
if upload_queue.dirty.metadata.ancestor_timeline().is_none() {
|
||||
drop(guard);
|
||||
panic!("cannot start detach ancestor if there is nothing to detach from");
|
||||
}
|
||||
}
|
||||
|
||||
let wanted = |x: Option<&index::GcBlocking>| x.is_some_and(|x| x.blocked_by(reason));
|
||||
|
||||
let current = upload_queue.dirty.gc_blocking.as_ref();
|
||||
let uploaded = upload_queue.clean.0.gc_blocking.as_ref();
|
||||
|
||||
match (current, uploaded) {
|
||||
(x, y) if wanted(x) && wanted(y) => None,
|
||||
(x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),
|
||||
// Usual case: !wanted(x) && !wanted(y)
|
||||
//
|
||||
// Unusual: !wanted(x) && wanted(y) which means we have two processes waiting to
|
||||
// turn on and off some reason.
|
||||
(x, y) => {
|
||||
if !wanted(x) && wanted(y) {
|
||||
// this could be avoided by having external in-memory synchronization, like
|
||||
// timeline detach ancestor
|
||||
warn!(?reason, op="insert", "unexpected: two racing processes to enable and disable a gc blocking reason");
|
||||
}
|
||||
|
||||
// at this point, the metadata must always show that there is a parent
|
||||
upload_queue.dirty.gc_blocking = current
|
||||
.map(|x| x.with_reason(reason))
|
||||
.or_else(|| Some(index::GcBlocking::started_now_for(reason)));
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
Some(self.schedule_barrier0(upload_queue))
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(async move {
|
||||
if let Some(barrier) = maybe_barrier {
|
||||
Self::wait_completion0(barrier).await?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
/// Removes a gc blocking reason for this timeline if one exists.
|
||||
///
|
||||
/// A retryable step of timeline detach ancestor.
|
||||
///
|
||||
/// Returns a future which waits until the completion of the upload.
|
||||
pub(crate) fn schedule_remove_gc_block_reason(
|
||||
self: &Arc<Self>,
|
||||
reason: index::GcBlockingReason,
|
||||
) -> Result<impl std::future::Future<Output = Result<(), WaitCompletionError>>, NotInitialized>
|
||||
{
|
||||
let maybe_barrier = {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
if let index::GcBlockingReason::DetachAncestor = reason {
|
||||
if !upload_queue
|
||||
.clean
|
||||
.0
|
||||
.lineage
|
||||
.is_detached_from_original_ancestor()
|
||||
{
|
||||
drop(guard);
|
||||
panic!("cannot complete timeline_ancestor_detach while not detached");
|
||||
}
|
||||
}
|
||||
|
||||
let wanted = |x: Option<&index::GcBlocking>| {
|
||||
x.is_none() || x.is_some_and(|b| !b.blocked_by(reason))
|
||||
};
|
||||
|
||||
let current = upload_queue.dirty.gc_blocking.as_ref();
|
||||
let uploaded = upload_queue.clean.0.gc_blocking.as_ref();
|
||||
|
||||
match (current, uploaded) {
|
||||
(x, y) if wanted(x) && wanted(y) => None,
|
||||
(x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),
|
||||
(x, y) => {
|
||||
if !wanted(x) && wanted(y) {
|
||||
warn!(?reason, op="remove", "unexpected: two racing processes to enable and disable a gc blocking reason (remove)");
|
||||
}
|
||||
|
||||
upload_queue.dirty.gc_blocking =
|
||||
current.as_ref().and_then(|x| x.without_reason(reason));
|
||||
assert!(wanted(upload_queue.dirty.gc_blocking.as_ref()));
|
||||
// FIXME: bogus ?
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
Some(self.schedule_barrier0(upload_queue))
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(async move {
|
||||
if let Some(barrier) = maybe_barrier {
|
||||
Self::wait_completion0(barrier).await?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
/// Launch an upload operation in the background; the file is added to be included in next
|
||||
/// `index_part.json` upload.
|
||||
pub(crate) fn schedule_layer_file_upload(
|
||||
|
||||
@@ -60,6 +60,9 @@ pub struct IndexPart {
|
||||
#[serde(default)]
|
||||
pub(crate) lineage: Lineage,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub(crate) gc_blocking: Option<GcBlocking>,
|
||||
|
||||
/// Describes the kind of aux files stored in the timeline.
|
||||
///
|
||||
/// The value is modified during file ingestion when the latest wanted value communicated via tenant config is applied if it is acceptable.
|
||||
@@ -101,6 +104,7 @@ impl IndexPart {
|
||||
deleted_at: None,
|
||||
archived_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
}
|
||||
}
|
||||
@@ -251,6 +255,64 @@ impl Lineage {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub(crate) struct GcBlocking {
|
||||
pub(crate) started_at: NaiveDateTime,
|
||||
pub(crate) reasons: enumset::EnumSet<GcBlockingReason>,
|
||||
}
|
||||
|
||||
#[derive(Debug, enumset::EnumSetType, serde::Serialize, serde::Deserialize)]
|
||||
#[enumset(serialize_repr = "list")]
|
||||
pub(crate) enum GcBlockingReason {
|
||||
Manual,
|
||||
DetachAncestor,
|
||||
}
|
||||
|
||||
impl GcBlocking {
|
||||
pub(super) fn started_now_for(reason: GcBlockingReason) -> Self {
|
||||
GcBlocking {
|
||||
started_at: chrono::Utc::now().naive_utc(),
|
||||
reasons: enumset::EnumSet::only(reason),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the given reason is one of the reasons why the gc is blocked.
|
||||
pub(crate) fn blocked_by(&self, reason: GcBlockingReason) -> bool {
|
||||
self.reasons.contains(reason)
|
||||
}
|
||||
|
||||
/// Returns a version of self with the given reason.
|
||||
pub(super) fn with_reason(&self, reason: GcBlockingReason) -> Self {
|
||||
assert!(!self.blocked_by(reason));
|
||||
let mut reasons = self.reasons;
|
||||
reasons.insert(reason);
|
||||
|
||||
Self {
|
||||
started_at: self.started_at,
|
||||
reasons,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a version of self without the given reason. Assumption is that if
|
||||
/// there are no more reasons, we can unblock the gc by returning `None`.
|
||||
pub(super) fn without_reason(&self, reason: GcBlockingReason) -> Option<Self> {
|
||||
assert!(self.blocked_by(reason));
|
||||
|
||||
if self.reasons.len() == 1 {
|
||||
None
|
||||
} else {
|
||||
let mut reasons = self.reasons;
|
||||
assert!(reasons.remove(reason));
|
||||
assert!(!reasons.is_empty());
|
||||
|
||||
Some(Self {
|
||||
started_at: self.started_at,
|
||||
reasons,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -292,6 +354,7 @@ mod tests {
|
||||
deleted_at: None,
|
||||
archived_at: None,
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
};
|
||||
|
||||
@@ -335,6 +398,7 @@ mod tests {
|
||||
deleted_at: None,
|
||||
archived_at: None,
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
};
|
||||
|
||||
@@ -379,6 +443,7 @@ mod tests {
|
||||
deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
archived_at: None,
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
};
|
||||
|
||||
@@ -426,6 +491,7 @@ mod tests {
|
||||
deleted_at: None,
|
||||
archived_at: None,
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
};
|
||||
|
||||
@@ -468,6 +534,7 @@ mod tests {
|
||||
deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
archived_at: None,
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
};
|
||||
|
||||
@@ -513,6 +580,7 @@ mod tests {
|
||||
reparenting_history: vec![TimelineId::from_str("e1bfd8c633d713d279e6fcd2bcc15b6d").unwrap()],
|
||||
original_ancestor: Some((TimelineId::from_str("e2bfd8c633d713d279e6fcd2bcc15b6d").unwrap(), Lsn::from_str("0/15A7618").unwrap(), parse_naive_datetime("2024-05-07T18:52:36.322426563"))),
|
||||
},
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
};
|
||||
|
||||
@@ -563,6 +631,7 @@ mod tests {
|
||||
reparenting_history: vec![TimelineId::from_str("e1bfd8c633d713d279e6fcd2bcc15b6d").unwrap()],
|
||||
original_ancestor: Some((TimelineId::from_str("e2bfd8c633d713d279e6fcd2bcc15b6d").unwrap(), Lsn::from_str("0/15A7618").unwrap(), parse_naive_datetime("2024-05-07T18:52:36.322426563"))),
|
||||
},
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Some(AuxFilePolicy::V2),
|
||||
};
|
||||
|
||||
@@ -618,6 +687,7 @@ mod tests {
|
||||
deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
archived_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Default::default(),
|
||||
};
|
||||
|
||||
@@ -674,6 +744,7 @@ mod tests {
|
||||
deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
archived_at: Some(parse_naive_datetime("2023-04-29T09:00:00.123000000")),
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Default::default(),
|
||||
};
|
||||
|
||||
@@ -681,6 +752,68 @@ mod tests {
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v9_indexpart_is_parsed() {
|
||||
let example = r#"{
|
||||
"version": 9,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata": {
|
||||
"disk_consistent_lsn": "0/16960E8",
|
||||
"prev_record_lsn": "0/1696070",
|
||||
"ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/1696070",
|
||||
"initdb_lsn": "0/1696070",
|
||||
"pg_version": 14
|
||||
},
|
||||
"gc_blocking": {
|
||||
"started_at": "2024-07-19T09:00:00.123",
|
||||
"reasons": ["DetachAncestor"]
|
||||
}
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 9,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::new(
|
||||
Lsn::from_str("0/16960E8").unwrap(),
|
||||
Some(Lsn::from_str("0/1696070").unwrap()),
|
||||
Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
|
||||
Lsn::INVALID,
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
14,
|
||||
).with_recalculated_checksum().unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: Some(GcBlocking {
|
||||
started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
|
||||
reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
fn parse_naive_datetime(s: &str) -> NaiveDateTime {
|
||||
chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f").unwrap()
|
||||
}
|
||||
|
||||
@@ -5698,6 +5698,22 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
/// Persistently blocks gc for `Manual` reason.
|
||||
///
|
||||
/// Returns true if no such block existed before, false otherwise.
|
||||
pub(crate) async fn block_gc(&self, tenant: &super::Tenant) -> anyhow::Result<bool> {
|
||||
use crate::tenant::remote_timeline_client::index::GcBlockingReason;
|
||||
assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
|
||||
tenant.gc_block.insert(self, GcBlockingReason::Manual).await
|
||||
}
|
||||
|
||||
/// Persistently unblocks gc for `Manual` reason.
|
||||
pub(crate) async fn unblock_gc(&self, tenant: &super::Tenant) -> anyhow::Result<()> {
|
||||
use crate::tenant::remote_timeline_client::index::GcBlockingReason;
|
||||
assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
|
||||
tenant.gc_block.remove(self, GcBlockingReason::Manual).await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn force_advance_lsn(self: &Arc<Timeline>, new_lsn: Lsn) {
|
||||
self.last_record_lsn.advance(new_lsn);
|
||||
|
||||
@@ -230,6 +230,8 @@ impl DeleteTimelineFlow {
|
||||
// Now that the Timeline is in Stopping state, request all the related tasks to shut down.
|
||||
timeline.shutdown(super::ShutdownMode::Hard).await;
|
||||
|
||||
tenant.gc_block.before_delete(&timeline);
|
||||
|
||||
fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
"failpoint: timeline-delete-before-index-deleted-at"
|
||||
|
||||
@@ -556,6 +556,22 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
assert isinstance(res_json, dict)
|
||||
return res_json
|
||||
|
||||
def timeline_block_gc(self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId):
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/block_gc",
|
||||
)
|
||||
log.info(f"Got GC request response code: {res.status_code}")
|
||||
self.verbose_error(res)
|
||||
|
||||
def timeline_unblock_gc(
|
||||
self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId
|
||||
):
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/unblock_gc",
|
||||
)
|
||||
log.info(f"Got GC request response code: {res.status_code}")
|
||||
self.verbose_error(res)
|
||||
|
||||
def timeline_compact(
|
||||
self,
|
||||
tenant_id: Union[TenantId, TenantShardId],
|
||||
|
||||
67
test_runner/regress/test_timeline_gc_blocking.py
Normal file
67
test_runner/regress/test_timeline_gc_blocking.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import time
|
||||
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
)
|
||||
from fixtures.pageserver.utils import wait_timeline_detail_404
|
||||
|
||||
|
||||
def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={"gc_period": "1s", "lsn_lease_length": "0s"}
|
||||
)
|
||||
ps = env.pageserver
|
||||
http = ps.http_client()
|
||||
|
||||
foo_branch = env.neon_cli.create_branch("foo", "main", env.initial_tenant)
|
||||
|
||||
gc_active_line = ".* gc_loop.*: [12] timelines need GC"
|
||||
gc_skipped_line = ".* gc_loop.*: Skipping GC: .*"
|
||||
init_gc_skipped = ".*: initialized with gc blocked.*"
|
||||
|
||||
tenant_before = http.tenant_status(env.initial_tenant)
|
||||
|
||||
wait_for_another_gc_round()
|
||||
_, offset = ps.assert_log_contains(gc_active_line)
|
||||
|
||||
assert ps.log_contains(gc_skipped_line, offset) is None
|
||||
|
||||
http.timeline_block_gc(env.initial_tenant, foo_branch)
|
||||
|
||||
tenant_after = http.tenant_status(env.initial_tenant)
|
||||
assert tenant_before != tenant_after
|
||||
gc_blocking = tenant_after["gc_blocking"]
|
||||
assert gc_blocking == "BlockingReasons { timelines: 1, reasons: EnumSet(Manual) }"
|
||||
|
||||
wait_for_another_gc_round()
|
||||
_, offset = ps.assert_log_contains(gc_skipped_line, offset)
|
||||
|
||||
ps.restart()
|
||||
ps.quiesce_tenants()
|
||||
|
||||
_, offset = env.pageserver.assert_log_contains(init_gc_skipped, offset)
|
||||
|
||||
wait_for_another_gc_round()
|
||||
_, offset = ps.assert_log_contains(gc_skipped_line, offset)
|
||||
|
||||
# deletion unblocks gc
|
||||
http.timeline_delete(env.initial_tenant, foo_branch)
|
||||
wait_timeline_detail_404(http, env.initial_tenant, foo_branch, 10, 1.0)
|
||||
|
||||
wait_for_another_gc_round()
|
||||
_, offset = ps.assert_log_contains(gc_active_line, offset)
|
||||
|
||||
http.timeline_block_gc(env.initial_tenant, env.initial_timeline)
|
||||
|
||||
wait_for_another_gc_round()
|
||||
_, offset = ps.assert_log_contains(gc_skipped_line, offset)
|
||||
|
||||
# removing the manual block also unblocks gc
|
||||
http.timeline_unblock_gc(env.initial_tenant, env.initial_timeline)
|
||||
|
||||
wait_for_another_gc_round()
|
||||
_, offset = ps.assert_log_contains(gc_active_line, offset)
|
||||
|
||||
|
||||
def wait_for_another_gc_round():
|
||||
time.sleep(2)
|
||||
Reference in New Issue
Block a user