diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 3a4e51e697..b1566c2d6e 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -715,7 +715,7 @@ fn start_pageserver( disk_usage_eviction_state, deletion_queue.new_client(), secondary_controller, - feature_resolver, + feature_resolver.clone(), ) .context("Failed to initialize router state")?, ); @@ -841,6 +841,7 @@ fn start_pageserver( } else { None }, + feature_resolver.clone(), ); // Spawn a Pageserver gRPC server task. It will spawn separate tasks for each request/stream. diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 77029cdeb6..a0998a7598 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -68,6 +68,7 @@ use crate::config::PageServerConf; use crate::context::{ DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder, }; +use crate::feature_resolver::FeatureResolver; use crate::metrics::{ self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, GetPageBatchBreakReason, LIVE_CONNECTIONS, MISROUTED_PAGESTREAM_REQUESTS, PAGESTREAM_HANDLER_RESULTS_TOTAL, SmgrOpTimer, TimelineMetrics, @@ -139,6 +140,7 @@ pub fn spawn( perf_trace_dispatch: Option, tcp_listener: tokio::net::TcpListener, tls_config: Option>, + feature_resolver: FeatureResolver, ) -> Listener { let cancel = CancellationToken::new(); let libpq_ctx = RequestContext::todo_child( @@ -160,6 +162,7 @@ pub fn spawn( conf.pg_auth_type, tls_config, conf.page_service_pipelining.clone(), + feature_resolver, libpq_ctx, cancel.clone(), ) @@ -218,6 +221,7 @@ pub async fn libpq_listener_main( auth_type: AuthType, tls_config: Option>, pipelining_config: PageServicePipeliningConfig, + feature_resolver: FeatureResolver, listener_ctx: RequestContext, listener_cancel: CancellationToken, ) -> Connections { @@ -261,6 +265,7 @@ pub async fn libpq_listener_main( auth_type, tls_config.clone(), pipelining_config.clone(), + feature_resolver.clone(), connection_ctx, connections_cancel.child_token(), gate_guard, @@ -303,6 +308,7 @@ async fn page_service_conn_main( auth_type: AuthType, tls_config: Option>, pipelining_config: PageServicePipeliningConfig, + feature_resolver: FeatureResolver, connection_ctx: RequestContext, cancel: CancellationToken, gate_guard: GateGuard, @@ -370,6 +376,7 @@ async fn page_service_conn_main( perf_span_fields, connection_ctx, cancel.clone(), + feature_resolver.clone(), gate_guard, ); let pgbackend = @@ -421,6 +428,8 @@ struct PageServerHandler { pipelining_config: PageServicePipeliningConfig, get_vectored_concurrent_io: GetVectoredConcurrentIo, + feature_resolver: FeatureResolver, + gate_guard: GateGuard, } @@ -587,6 +596,15 @@ impl timeline::handle::TenantManager for TenantManagerWrappe } } +/// Whether to hold the applied GC cutoff guard when processing GetPage requests. +/// This is determined once at the start of pagestream subprotocol handling based on +/// feature flags, configuration, and test conditions. +#[derive(Debug, Clone, Copy)] +enum HoldAppliedGcCutoffGuard { + Yes, + No, +} + #[derive(thiserror::Error, Debug)] enum PageStreamError { /// We encountered an error that should prompt the client to reconnect: @@ -730,6 +748,7 @@ enum BatchedFeMessage { GetPage { span: Span, shard: WeakHandle, + applied_gc_cutoff_guard: Option>, pages: SmallVec<[BatchedGetPageRequest; 1]>, batch_break_reason: GetPageBatchBreakReason, }, @@ -909,6 +928,7 @@ impl PageServerHandler { perf_span_fields: ConnectionPerfSpanFields, connection_ctx: RequestContext, cancel: CancellationToken, + feature_resolver: FeatureResolver, gate_guard: GateGuard, ) -> Self { PageServerHandler { @@ -920,6 +940,7 @@ impl PageServerHandler { cancel, pipelining_config, get_vectored_concurrent_io, + feature_resolver, gate_guard, } } @@ -959,6 +980,7 @@ impl PageServerHandler { ctx: &RequestContext, protocol_version: PagestreamProtocolVersion, parent_span: Span, + hold_gc_cutoff_guard: HoldAppliedGcCutoffGuard, ) -> Result, QueryError> where IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static, @@ -1196,19 +1218,27 @@ impl PageServerHandler { }) .await?; + let applied_gc_cutoff_guard = shard.get_applied_gc_cutoff_lsn(); // hold guard // We're holding the Handle let effective_lsn = match Self::effective_request_lsn( &shard, shard.get_last_record_lsn(), req.hdr.request_lsn, req.hdr.not_modified_since, - &shard.get_applied_gc_cutoff_lsn(), + &applied_gc_cutoff_guard, ) { Ok(lsn) => lsn, Err(e) => { return respond_error!(span, e); } }; + let applied_gc_cutoff_guard = match hold_gc_cutoff_guard { + HoldAppliedGcCutoffGuard::Yes => Some(applied_gc_cutoff_guard), + HoldAppliedGcCutoffGuard::No => { + drop(applied_gc_cutoff_guard); + None + } + }; let batch_wait_ctx = if ctx.has_perf_span() { Some( @@ -1229,6 +1259,7 @@ impl PageServerHandler { BatchedFeMessage::GetPage { span, shard: shard.downgrade(), + applied_gc_cutoff_guard, pages: smallvec![BatchedGetPageRequest { req, timer, @@ -1329,13 +1360,28 @@ impl PageServerHandler { match (eligible_batch, this_msg) { ( BatchedFeMessage::GetPage { - pages: accum_pages, .. + pages: accum_pages, + applied_gc_cutoff_guard: accum_applied_gc_cutoff_guard, + .. }, BatchedFeMessage::GetPage { - pages: this_pages, .. + pages: this_pages, + applied_gc_cutoff_guard: this_applied_gc_cutoff_guard, + .. }, ) => { accum_pages.extend(this_pages); + // the minimum of the two guards will keep data for both alive + match (&accum_applied_gc_cutoff_guard, this_applied_gc_cutoff_guard) { + (None, None) => (), + (None, Some(this)) => *accum_applied_gc_cutoff_guard = Some(this), + (Some(_), None) => (), + (Some(accum), Some(this)) => { + if **accum > *this { + *accum_applied_gc_cutoff_guard = Some(this); + } + } + }; Ok(()) } #[cfg(feature = "testing")] @@ -1650,6 +1696,7 @@ impl PageServerHandler { BatchedFeMessage::GetPage { span, shard, + applied_gc_cutoff_guard, pages, batch_break_reason, } => { @@ -1669,6 +1716,7 @@ impl PageServerHandler { .instrument(span.clone()) .await; assert_eq!(res.len(), npages); + drop(applied_gc_cutoff_guard); res }, span, @@ -1750,7 +1798,7 @@ impl PageServerHandler { /// Coding discipline within this function: all interaction with the `pgb` connection /// needs to be sensitive to connection shutdown, currently signalled via [`Self::cancel`]. /// This is so that we can shutdown page_service quickly. - #[instrument(skip_all)] + #[instrument(skip_all, fields(hold_gc_cutoff_guard))] async fn handle_pagerequests( &mut self, pgb: &mut PostgresBackend, @@ -1796,6 +1844,30 @@ impl PageServerHandler { .take() .expect("implementation error: timeline_handles should not be locked"); + // Evaluate the expensive feature resolver check once per pagestream subprotocol handling + // instead of once per GetPage request. This is shared between pipelined and serial paths. + let hold_gc_cutoff_guard = if cfg!(test) || cfg!(feature = "testing") { + HoldAppliedGcCutoffGuard::Yes + } else { + // Use the global feature resolver with the tenant ID directly, avoiding the need + // to get a timeline/shard which might not be available on this pageserver node. + let empty_properties = std::collections::HashMap::new(); + match self.feature_resolver.evaluate_boolean( + "page-service-getpage-hold-applied-gc-cutoff-guard", + tenant_id, + &empty_properties, + ) { + Ok(()) => HoldAppliedGcCutoffGuard::Yes, + Err(_) => HoldAppliedGcCutoffGuard::No, + } + }; + // record it in the span of handle_pagerequests so that both the request_span + // and the pipeline implementation spans contains the field. + Span::current().record( + "hold_gc_cutoff_guard", + tracing::field::debug(&hold_gc_cutoff_guard), + ); + let request_span = info_span!("request"); let ((pgb_reader, timeline_handles), result) = match self.pipelining_config.clone() { PageServicePipeliningConfig::Pipelined(pipelining_config) => { @@ -1809,6 +1881,7 @@ impl PageServerHandler { pipelining_config, protocol_version, io_concurrency, + hold_gc_cutoff_guard, &ctx, ) .await @@ -1823,6 +1896,7 @@ impl PageServerHandler { request_span, protocol_version, io_concurrency, + hold_gc_cutoff_guard, &ctx, ) .await @@ -1851,6 +1925,7 @@ impl PageServerHandler { request_span: Span, protocol_version: PagestreamProtocolVersion, io_concurrency: IoConcurrency, + hold_gc_cutoff_guard: HoldAppliedGcCutoffGuard, ctx: &RequestContext, ) -> ( (PostgresBackendReader, TimelineHandles), @@ -1872,6 +1947,7 @@ impl PageServerHandler { ctx, protocol_version, request_span.clone(), + hold_gc_cutoff_guard, ) .await; let msg = match msg { @@ -1919,6 +1995,7 @@ impl PageServerHandler { pipelining_config: PageServicePipeliningConfigPipelined, protocol_version: PagestreamProtocolVersion, io_concurrency: IoConcurrency, + hold_gc_cutoff_guard: HoldAppliedGcCutoffGuard, ctx: &RequestContext, ) -> ( (PostgresBackendReader, TimelineHandles), @@ -2022,6 +2099,7 @@ impl PageServerHandler { &ctx, protocol_version, request_span.clone(), + hold_gc_cutoff_guard, ) .await; let Some(read_res) = read_res.transpose() else { @@ -2068,6 +2146,7 @@ impl PageServerHandler { pages, span: _, shard: _, + applied_gc_cutoff_guard: _, batch_break_reason: _, } = &mut batch { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 483e9d9a2a..2c70c5cfa5 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -70,7 +70,7 @@ use tracing::*; use utils::generation::Generation; use utils::guard_arc_swap::GuardArcSwap; use utils::id::TimelineId; -use utils::logging::{MonitorSlowFutureCallback, monitor_slow_future}; +use utils::logging::{MonitorSlowFutureCallback, log_slow, monitor_slow_future}; use utils::lsn::{AtomicLsn, Lsn, RecordLsn}; use utils::postgres_client::PostgresClientProtocol; use utils::rate_limit::RateLimit; @@ -6898,7 +6898,13 @@ impl Timeline { write_guard.store_and_unlock(new_gc_cutoff) }; - waitlist.wait().await; + let waitlist_wait_fut = std::pin::pin!(waitlist.wait()); + log_slow( + "applied_gc_cutoff waitlist wait", + Duration::from_secs(30), + waitlist_wait_fut, + ) + .await; info!("GC starting");