From 0a14ff398261611a927854f0625cb47ee37f3c9e Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 17 Jul 2025 14:53:38 +0000 Subject: [PATCH] Revert "switch lease renewal to use lsn from the state fed by compute monitor" This reverts commit 9eb60807d883db2a0573d1d3ec4be564f7265617. --- compute_tools/src/compute.rs | 4 +-- compute_tools/src/lsn_lease.rs | 50 ++++++++++++---------------------- 2 files changed, 19 insertions(+), 35 deletions(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 0479a44c73..b263182e8b 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -43,7 +43,7 @@ use crate::configurator::launch_configurator; use crate::disk_quota::set_disk_quota; use crate::installed_extensions::get_installed_extensions; use crate::logger::startup_context_from_env; -use crate::lsn_lease::{self, launch_lsn_lease_bg_task}; +use crate::lsn_lease::{self, launch_lsn_lease_bg_task_for_static}; use crate::metrics::COMPUTE_CTL_UP; use crate::monitor::launch_monitor; use crate::pg_helpers::*; @@ -491,7 +491,7 @@ impl ComputeNode { this.wait_spec()? }; - launch_lsn_lease_bg_task(&this); + launch_lsn_lease_bg_task_for_static(&this); // We have a spec, start the compute let mut delay_exit = false; diff --git a/compute_tools/src/lsn_lease.rs b/compute_tools/src/lsn_lease.rs index ed956d9134..d6ebce38ba 100644 --- a/compute_tools/src/lsn_lease.rs +++ b/compute_tools/src/lsn_lease.rs @@ -4,11 +4,10 @@ use std::thread; use std::time::{Duration, SystemTime}; use anyhow::{Result, bail}; -use compute_api::spec::PageserverProtocol; +use compute_api::spec::{ComputeMode, PageserverProtocol}; use itertools::Itertools as _; use pageserver_page_api as page_api; use postgres::{NoTls, SimpleQueryMessage}; -use tokio::sync::watch; use tracing::{info, warn}; use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; @@ -34,19 +33,23 @@ impl State { } } -/// Spawns a background thread to periodically renew LSN leases. -pub fn launch_lsn_lease_bg_task(compute: &Arc) { - let (tenant_id, timeline_id) = { +/// Spawns a background thread to periodically renew LSN leases for static compute. +/// Do nothing if the compute is not in static mode. +pub fn launch_lsn_lease_bg_task_for_static(compute: &Arc) { + let (tenant_id, timeline_id, lsn) = { let state = compute.state.lock().unwrap(); let spec = state.pspec.as_ref().expect("Spec must be set"); - (spec.tenant_id, spec.timeline_id) + match spec.spec.mode { + ComputeMode::Static(lsn) => (spec.tenant_id, spec.timeline_id, lsn), + _ => return, + } }; let compute = compute.clone(); - let span = tracing::info_span!("lsn_lease_bg_task", %tenant_id, %timeline_id); + let span = tracing::info_span!("lsn_lease_bg_task", %tenant_id, %timeline_id, %lsn); thread::spawn(move || { let _entered = span.entered(); - if let Err(e) = lsn_lease_bg_task(compute, tenant_id, timeline_id) { + if let Err(e) = lsn_lease_bg_task(compute, tenant_id, timeline_id, lsn) { // TODO: might need stronger error feedback than logging an warning. warn!("Exited with error: {e}"); } @@ -58,22 +61,10 @@ fn lsn_lease_bg_task( compute: Arc, tenant_id: TenantId, timeline_id: TimelineId, + lsn: Lsn, ) -> Result<()> { - let mut rx = compute.lsn_lease_state.desired_lease_lsn.subscribe(); loop { - let Some(valid_until) = - acquire_lsn_lease_with_retry(&compute, tenant_id, timeline_id, &rx)? - else { - info!(current=?*rx.borrow(), "sleeping until desired_lease_lsn changes to Some()"); - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap(); - rt.block_on(rx.wait_for(|current| current.is_some())) - .expect("the Sender is kept alive in ComputeNode, which we hold a reference to"); - info!(current=?*rx.borrow(), "waking up from sleep"); - continue; - }; + let valid_until = acquire_lsn_lease_with_retry(&compute, tenant_id, timeline_id, lsn)?; let valid_duration = valid_until .duration_since(SystemTime::now()) .unwrap_or(Duration::ZERO); @@ -92,15 +83,13 @@ fn lsn_lease_bg_task( } /// Acquires lsn lease in a retry loop. Returns the expiration time if a lease is granted. -/// Returns an error if a lease is explicitly not granted. -/// Returns None if the rx is None. -/// Otherwise, we keep sending requests. +/// Returns an error if a lease is explicitly not granted. Otherwise, we keep sending requests. fn acquire_lsn_lease_with_retry( compute: &Arc, tenant_id: TenantId, timeline_id: TimelineId, - rx: &watch::Receiver>, -) -> Result> { + lsn: Lsn, +) -> Result { let mut attempts = 0usize; let mut retry_period_ms: f64 = 500.0; const MAX_RETRY_PERIOD_MS: f64 = 60.0 * 1000.0; @@ -116,16 +105,11 @@ fn acquire_lsn_lease_with_retry( ) }; - // immediately copy the value so the borrow is short-lived, it's a best practice - let Some(lsn) = *rx.borrow() else { - return Ok(None); - }; - let result = try_acquire_lsn_lease(&connstrings, auth.as_deref(), tenant_id, timeline_id, lsn); match result { Ok(Some(res)) => { - return Ok(Some(res)); + return Ok(res); } Ok(None) => { bail!("Permanent error: lease could not be obtained, LSN is behind the GC cutoff");